From 160d45c251b5041b9688ff68c3a7c5e091a50989 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Mon, 13 Nov 2023 22:21:12 -0900
Subject: [PATCH 01/23] MINOR: [Docs] Tweak text on docs index page (#38695)

### Rationale for this change

These are just some minor style tweaks which made the text and buttons on these cards read more naturally to me. Feel free to ignore or take only some of the changes here.

### Are these changes tested?

Yes, I confirmed the docs still build as expected locally.

### Are there any user-facing changes?

New language in docs, see above.

Authored-by: Bryce Mecum <petridish@gmail.com>
Signed-off-by: AlenkaF <frim.alenka@gmail.com>
---
 docs/source/index.rst | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index d01c74f9a482e..8407813bd7abb 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -46,8 +46,8 @@ target environment.**
       :class-card: contrib-card
       :shadow: none
 
-      Read about the Apache Arrow format
-      specifications and Protocols.
+      Read about the Apache Arrow format and its related specifications and
+      protocols.
 
       +++
 
@@ -57,17 +57,15 @@ target environment.**
          :color: primary
          :expand:
 
-         To the Specifications
+         To Specifications
 
    .. grid-item-card:: Development
       :class-card: contrib-card
       :shadow: none
 
-      Find the documentation on the topic of
-      contributions, reviews, building of the libraries
-      from source, building of the documentation, 
-      continuous integration, benchmarks and the
-      release process.
+      Find documentation on building the libraries from source, building the
+      documentation, contributing and code reviews, continuous integration,
+      benchmarking, and the release process.
 
       +++
 
@@ -77,7 +75,7 @@ target environment.**
          :color: primary
          :expand:
 
-         To the Development
+         To Development
 
 .. _toc.columnar:
 

From a4080209a97a5d66accdeb71c5c1ffa982fed51e Mon Sep 17 00:00:00 2001
From: James Duong <james.duong@improving.com>
Date: Tue, 14 Nov 2023 05:10:36 -0800
Subject: [PATCH 02/23] GH-38662: [Java] Add comparators (#38669)

### Rationale for this change
Add missing Default VectorValueComparators for some more types.

### What changes are included in this PR?
Add comparators for:
- FixedSizeBinaryVector
- LargeListVector
- FixedSizeListVector
- NullVector

### Are these changes tested?
Yes, unit tests added.

### Are there any user-facing changes?
No
* Closes: #38662

Authored-by: James Duong <james.duong@improving.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../sort/DefaultVectorComparators.java        | 140 ++++++++++++++++--
 .../sort/TestDefaultVectorComparator.java     | 132 +++++++++++++++++
 2 files changed, 259 insertions(+), 13 deletions(-)

diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
index 4f9c8b7d71bab..588876aa99059 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
@@ -32,11 +32,13 @@
 import org.apache.arrow.vector.Decimal256Vector;
 import org.apache.arrow.vector.DecimalVector;
 import org.apache.arrow.vector.DurationVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
 import org.apache.arrow.vector.Float4Vector;
 import org.apache.arrow.vector.Float8Vector;
 import org.apache.arrow.vector.IntVector;
 import org.apache.arrow.vector.IntervalDayVector;
 import org.apache.arrow.vector.IntervalMonthDayNanoVector;
+import org.apache.arrow.vector.NullVector;
 import org.apache.arrow.vector.SmallIntVector;
 import org.apache.arrow.vector.TimeMicroVector;
 import org.apache.arrow.vector.TimeMilliVector;
@@ -50,7 +52,9 @@
 import org.apache.arrow.vector.UInt8Vector;
 import org.apache.arrow.vector.ValueVector;
 import org.apache.arrow.vector.VariableWidthVector;
-import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.RepeatedValueVector;
+import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder;
 
 /**
  * Default comparator implementations for different types of vectors.
@@ -111,13 +115,21 @@ public static <T extends ValueVector> VectorValueComparator<T> createDefaultComp
         return (VectorValueComparator<T>) new TimeSecComparator();
       } else if (vector instanceof TimeStampVector) {
         return (VectorValueComparator<T>) new TimeStampComparator();
+      } else if (vector instanceof FixedSizeBinaryVector) {
+        return (VectorValueComparator<T>) new FixedSizeBinaryComparator();
       }
     } else if (vector instanceof VariableWidthVector) {
       return (VectorValueComparator<T>) new VariableWidthComparator();
-    } else if (vector instanceof BaseRepeatedValueVector) {
+    } else if (vector instanceof RepeatedValueVector) {
       VectorValueComparator<?> innerComparator =
-              createDefaultComparator(((BaseRepeatedValueVector) vector).getDataVector());
+              createDefaultComparator(((RepeatedValueVector) vector).getDataVector());
       return new RepeatedValueComparator(innerComparator);
+    } else if (vector instanceof FixedSizeListVector) {
+      VectorValueComparator<?> innerComparator =
+          createDefaultComparator(((FixedSizeListVector) vector).getDataVector());
+      return new FixedSizeListComparator(innerComparator);
+    } else if (vector instanceof NullVector) {
+      return (VectorValueComparator<T>) new NullComparator();
     }
 
     throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName());
@@ -674,6 +686,61 @@ public VectorValueComparator<TimeStampVector> createNew() {
     }
   }
 
+  /**
+   * Default comparator for {@link org.apache.arrow.vector.FixedSizeBinaryVector}.
+   * The comparison is in lexicographic order, with null comes first.
+   */
+  public static class FixedSizeBinaryComparator extends VectorValueComparator<FixedSizeBinaryVector> {
+
+    @Override
+    public int compare(int index1, int index2) {
+      NullableFixedSizeBinaryHolder holder1 = new NullableFixedSizeBinaryHolder();
+      NullableFixedSizeBinaryHolder holder2 = new NullableFixedSizeBinaryHolder();
+      vector1.get(index1, holder1);
+      vector2.get(index2, holder2);
+
+      return ByteFunctionHelpers.compare(
+          holder1.buffer, 0, holder1.byteWidth, holder2.buffer, 0, holder2.byteWidth);
+    }
+
+    @Override
+    public int compareNotNull(int index1, int index2) {
+      NullableFixedSizeBinaryHolder holder1 = new NullableFixedSizeBinaryHolder();
+      NullableFixedSizeBinaryHolder holder2 = new NullableFixedSizeBinaryHolder();
+      vector1.get(index1, holder1);
+      vector2.get(index2, holder2);
+
+      return ByteFunctionHelpers.compare(
+          holder1.buffer, 0, holder1.byteWidth, holder2.buffer, 0, holder2.byteWidth);
+    }
+
+    @Override
+    public VectorValueComparator<FixedSizeBinaryVector> createNew() {
+      return new FixedSizeBinaryComparator();
+    }
+  }
+
+  /**
+   * Default comparator for {@link org.apache.arrow.vector.NullVector}.
+   */
+  public static class NullComparator extends VectorValueComparator<NullVector> {
+    @Override
+    public int compare(int index1, int index2) {
+      // Values are always equal (and are always null).
+      return 0;
+    }
+
+    @Override
+    public int compareNotNull(int index1, int index2) {
+      throw new AssertionError("Cannot compare non-null values in a NullVector.");
+    }
+
+    @Override
+    public VectorValueComparator<NullVector> createNew() {
+      return new NullComparator();
+    }
+  }
+
   /**
    * Default comparator for {@link org.apache.arrow.vector.VariableWidthVector}.
    * The comparison is in lexicographic order, with null comes first.
@@ -705,14 +772,14 @@ public VectorValueComparator<VariableWidthVector> createNew() {
   }
 
   /**
-   * Default comparator for {@link BaseRepeatedValueVector}.
+   * Default comparator for {@link RepeatedValueVector}.
    * It works by comparing the underlying vector in a lexicographic order.
    * @param <T> inner vector type.
    */
   public static class RepeatedValueComparator<T extends ValueVector>
-          extends VectorValueComparator<BaseRepeatedValueVector> {
+          extends VectorValueComparator<RepeatedValueVector> {
 
-    private VectorValueComparator<T> innerComparator;
+    private final VectorValueComparator<T> innerComparator;
 
     public RepeatedValueComparator(VectorValueComparator<T> innerComparator) {
       this.innerComparator = innerComparator;
@@ -720,16 +787,16 @@ public RepeatedValueComparator(VectorValueComparator<T> innerComparator) {
 
     @Override
     public int compareNotNull(int index1, int index2) {
-      int startIdx1 = vector1.getOffsetBuffer().getInt(index1 * OFFSET_WIDTH);
-      int startIdx2 = vector2.getOffsetBuffer().getInt(index2 * OFFSET_WIDTH);
+      int startIdx1 = vector1.getOffsetBuffer().getInt((long) index1 * OFFSET_WIDTH);
+      int startIdx2 = vector2.getOffsetBuffer().getInt((long) index2 * OFFSET_WIDTH);
 
-      int endIdx1 = vector1.getOffsetBuffer().getInt((index1 + 1) * OFFSET_WIDTH);
-      int endIdx2 = vector2.getOffsetBuffer().getInt((index2 + 1) * OFFSET_WIDTH);
+      int endIdx1 = vector1.getOffsetBuffer().getInt((long) (index1 + 1) * OFFSET_WIDTH);
+      int endIdx2 = vector2.getOffsetBuffer().getInt((long) (index2 + 1) * OFFSET_WIDTH);
 
       int length1 = endIdx1 - startIdx1;
       int length2 = endIdx2 - startIdx2;
 
-      int length = length1 < length2 ? length1 : length2;
+      int length = Math.min(length1, length2);
 
       for (int i = 0; i < length; i++) {
         int result = innerComparator.compare(startIdx1 + i, startIdx2 + i);
@@ -741,13 +808,60 @@ public int compareNotNull(int index1, int index2) {
     }
 
     @Override
-    public VectorValueComparator<BaseRepeatedValueVector> createNew() {
+    public VectorValueComparator<RepeatedValueVector> createNew() {
       VectorValueComparator<T> newInnerComparator = innerComparator.createNew();
       return new RepeatedValueComparator<>(newInnerComparator);
     }
 
     @Override
-    public void attachVectors(BaseRepeatedValueVector vector1, BaseRepeatedValueVector vector2) {
+    public void attachVectors(RepeatedValueVector vector1, RepeatedValueVector vector2) {
+      this.vector1 = vector1;
+      this.vector2 = vector2;
+
+      innerComparator.attachVectors((T) vector1.getDataVector(), (T) vector2.getDataVector());
+    }
+  }
+
+  /**
+   * Default comparator for {@link RepeatedValueVector}.
+   * It works by comparing the underlying vector in a lexicographic order.
+   * @param <T> inner vector type.
+   */
+  public static class FixedSizeListComparator<T extends ValueVector>
+      extends VectorValueComparator<FixedSizeListVector> {
+
+    private final VectorValueComparator<T> innerComparator;
+
+    public FixedSizeListComparator(VectorValueComparator<T> innerComparator) {
+      this.innerComparator = innerComparator;
+    }
+
+    @Override
+    public int compareNotNull(int index1, int index2) {
+      int length1 = vector1.getListSize();
+      int length2 = vector2.getListSize();
+
+      int length = Math.min(length1, length2);
+      int startIdx1 = vector1.getElementStartIndex(index1);
+      int startIdx2 = vector2.getElementStartIndex(index2);
+
+      for (int i = 0; i < length; i++) {
+        int result = innerComparator.compare(startIdx1 + i, startIdx2 + i);
+        if (result != 0) {
+          return result;
+        }
+      }
+      return length1 - length2;
+    }
+
+    @Override
+    public VectorValueComparator<FixedSizeListVector> createNew() {
+      VectorValueComparator<T> newInnerComparator = innerComparator.createNew();
+      return new FixedSizeListComparator<>(newInnerComparator);
+    }
+
+    @Override
+    public void attachVectors(FixedSizeListVector vector1, FixedSizeListVector vector2) {
       this.vector1 = vector1;
       this.vector2 = vector2;
 
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java
index bdae85110aa62..43c634b7647fb 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java
@@ -31,12 +31,14 @@
 import org.apache.arrow.vector.Decimal256Vector;
 import org.apache.arrow.vector.DecimalVector;
 import org.apache.arrow.vector.DurationVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
 import org.apache.arrow.vector.Float4Vector;
 import org.apache.arrow.vector.Float8Vector;
 import org.apache.arrow.vector.IntVector;
 import org.apache.arrow.vector.IntervalDayVector;
 import org.apache.arrow.vector.LargeVarBinaryVector;
 import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.NullVector;
 import org.apache.arrow.vector.SmallIntVector;
 import org.apache.arrow.vector.TimeMicroVector;
 import org.apache.arrow.vector.TimeMilliVector;
@@ -52,6 +54,8 @@
 import org.apache.arrow.vector.ValueVector;
 import org.apache.arrow.vector.VarBinaryVector;
 import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
 import org.apache.arrow.vector.types.TimeUnit;
@@ -158,6 +162,61 @@ public void testCopiedComparatorForLists() {
     }
   }
 
+  private FixedSizeListVector createFixedSizeListVector(int count) {
+    FixedSizeListVector listVector = FixedSizeListVector.empty("list vector", count, allocator);
+    Types.MinorType type = Types.MinorType.INT;
+    listVector.addOrGetVector(FieldType.nullable(type.getType()));
+    listVector.allocateNew();
+
+    IntVector dataVector = (IntVector) listVector.getDataVector();
+
+    for (int i = 0; i < count; i++) {
+      dataVector.set(i, i);
+    }
+    dataVector.setValueCount(count);
+
+    listVector.setNotNull(0);
+    listVector.setValueCount(1);
+
+    return listVector;
+  }
+
+  @Test
+  public void testCompareFixedSizeLists() {
+    try (FixedSizeListVector listVector1 = createFixedSizeListVector(10);
+         FixedSizeListVector listVector2 = createFixedSizeListVector(11)) {
+      VectorValueComparator<FixedSizeListVector> comparator =
+          DefaultVectorComparators.createDefaultComparator(listVector1);
+      comparator.attachVectors(listVector1, listVector2);
+
+      // prefix is smaller
+      assertTrue(comparator.compare(0, 0) < 0);
+    }
+
+    try (FixedSizeListVector listVector1 = createFixedSizeListVector(11);
+         FixedSizeListVector listVector2 = createFixedSizeListVector(11)) {
+      ((IntVector) listVector2.getDataVector()).set(10, 110);
+
+      VectorValueComparator<FixedSizeListVector> comparator =
+          DefaultVectorComparators.createDefaultComparator(listVector1);
+      comparator.attachVectors(listVector1, listVector2);
+
+      // breaking tie by the last element
+      assertTrue(comparator.compare(0, 0) < 0);
+    }
+
+    try (FixedSizeListVector listVector1 = createFixedSizeListVector(10);
+         FixedSizeListVector listVector2 = createFixedSizeListVector(10)) {
+
+      VectorValueComparator<FixedSizeListVector> comparator =
+          DefaultVectorComparators.createDefaultComparator(listVector1);
+      comparator.attachVectors(listVector1, listVector2);
+
+      // list vector elements equal
+      assertTrue(comparator.compare(0, 0) == 0);
+    }
+  }
+
   @Test
   public void testCompareUInt1() {
     try (UInt1Vector vec = new UInt1Vector("", allocator)) {
@@ -845,6 +904,65 @@ public void testCompareTimeStamp() {
     }
   }
 
+  @Test
+  public void testCompareFixedSizeBinary() {
+    try (FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("test1", allocator, 2);
+         FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) {
+      vector1.allocateNew();
+      vector2.allocateNew();
+      vector1.set(0, new byte[] {1, 1});
+      vector2.set(0, new byte[] {1, 1, 0});
+      VectorValueComparator<FixedSizeBinaryVector> comparator =
+          DefaultVectorComparators.createDefaultComparator(vector1);
+      comparator.attachVectors(vector1, vector2);
+
+      // prefix is smaller
+      assertTrue(comparator.compare(0, 0) < 0);
+    }
+
+    try (FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("test1", allocator, 3);
+         FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) {
+      vector1.allocateNew();
+      vector2.allocateNew();
+      vector1.set(0, new byte[] {1, 1, 0});
+      vector2.set(0, new byte[] {1, 1, 1});
+      VectorValueComparator<FixedSizeBinaryVector> comparator =
+          DefaultVectorComparators.createDefaultComparator(vector1);
+      comparator.attachVectors(vector1, vector2);
+
+      // breaking tie by the last element
+      assertTrue(comparator.compare(0, 0) < 0);
+    }
+
+    try (FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("test1", allocator, 3);
+         FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) {
+      vector1.allocateNew();
+      vector2.allocateNew();
+      vector1.set(0, new byte[] {1, 1, 1});
+      vector2.set(0, new byte[] {1, 1, 1});
+      VectorValueComparator<FixedSizeBinaryVector> comparator =
+          DefaultVectorComparators.createDefaultComparator(vector1);
+      comparator.attachVectors(vector1, vector2);
+
+      // list vector elements equal
+      assertTrue(comparator.compare(0, 0) == 0);
+    }
+  }
+
+  @Test
+  public void testCompareNull() {
+    try (NullVector vec = new NullVector("test",
+        FieldType.notNullable(new ArrowType.Int(32, false)))) {
+      vec.setValueCount(2);
+
+      VectorValueComparator<NullVector> comparator =
+          DefaultVectorComparators.createDefaultComparator(vec);
+      comparator.attachVector(vec);
+      assertEquals(DefaultVectorComparators.NullComparator.class, comparator.getClass());
+      assertEquals(0, comparator.compare(0, 1));
+    }
+  }
+
   @Test
   public void testCheckNullsOnCompareIsFalseForNonNullableVector() {
     try (IntVector vec = new IntVector("not nullable",
@@ -937,4 +1055,18 @@ private static <V extends ValueVector> void verifyVariableWidthComparatorReturne
     VectorValueComparator<V> comparator = DefaultVectorComparators.createDefaultComparator(vec);
     assertEquals(DefaultVectorComparators.VariableWidthComparator.class, comparator.getClass());
   }
+
+  @Test
+  public void testRepeatedDefaultComparators() {
+    final FieldType type = FieldType.nullable(Types.MinorType.INT.getType());
+    try (final LargeListVector vector = new LargeListVector("list", allocator, type, null)) {
+      vector.addOrGetVector(FieldType.nullable(type.getType()));
+      verifyRepeatedComparatorReturned(vector);
+    }
+  }
+
+  private static <V extends ValueVector> void verifyRepeatedComparatorReturned(V vec) {
+    VectorValueComparator<V> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+    assertEquals(DefaultVectorComparators.RepeatedValueComparator.class, comparator.getClass());
+  }
 }

From f3ec224ab6ace14f630509c79dfbba2ec32d881a Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 14 Nov 2023 14:25:29 +0100
Subject: [PATCH 03/23] GH-38626: [Python] Fix segfault when PyArrow is
 imported at shutdown (#38637)

### Rationale for this change

Some C++ destructors may be called after the Python interpreter has ceased to exist.
If such a destructor tries to call back in the Python interpreter, for example by calling `Py_DECREF`, we get a crash.

### What changes are included in this PR?

Protect `OwnedRef` and `OwneRefNoGIL` destructors against decref'ing a Python object after Python finalization.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #38626

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/src/arrow/python/common.h | 17 ++++++++++-------
 python/pyarrow/tests/test_misc.py        | 13 +++++++++++++
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/common.h b/python/pyarrow/src/arrow/python/common.h
index e36c0834fd424..bc567ef78e83a 100644
--- a/python/pyarrow/src/arrow/python/common.h
+++ b/python/pyarrow/src/arrow/python/common.h
@@ -188,7 +188,12 @@ class ARROW_PYTHON_EXPORT OwnedRef {
     return *this;
   }
 
-  ~OwnedRef() { reset(); }
+  ~OwnedRef() {
+    // GH-38626: destructor may be called after the Python interpreter is finalized.
+    if (Py_IsInitialized()) {
+      reset();
+    }
+  }
 
   void reset(PyObject* obj) {
     Py_XDECREF(obj_);
@@ -225,13 +230,11 @@ class ARROW_PYTHON_EXPORT OwnedRefNoGIL : public OwnedRef {
   explicit OwnedRefNoGIL(PyObject* obj) : OwnedRef(obj) {}
 
   ~OwnedRefNoGIL() {
-    // This destructor may be called after the Python interpreter is finalized.
-    // At least avoid spurious attempts to take the GIL when not necessary.
-    if (obj() == NULLPTR) {
-      return;
+    // GH-38626: destructor may be called after the Python interpreter is finalized.
+    if (Py_IsInitialized() && obj() != NULLPTR) {
+      PyAcquireGIL lock;
+      reset();
     }
-    PyAcquireGIL lock;
-    reset();
   }
 };
 
diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py
index 9b9dfdd554806..a48ac0c3cd81a 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -117,6 +117,19 @@ def test_runtime_info():
         subprocess.check_call([sys.executable, "-c", code], env=env)
 
 
+def test_import_at_shutdown():
+    # GH-38626: importing PyArrow at interpreter shutdown would crash
+    code = """if 1:
+        import atexit
+
+        def import_arrow():
+            import pyarrow
+
+        atexit.register(import_arrow)
+        """
+    subprocess.check_call([sys.executable, "-c", code])
+
+
 @pytest.mark.skipif(sys.platform == "win32",
                     reason="Path to timezone database is not configurable "
                            "on non-Windows platforms")

From bb7ffaf0bd0661baba872c3fe1500369f26241bd Mon Sep 17 00:00:00 2001
From: Matt Topol <zotthewizard@gmail.com>
Date: Tue, 14 Nov 2023 10:43:28 -0500
Subject: [PATCH 04/23] GH-38477: [Go] Fixing decimal 128 rounding issue
 (#38478)

### Rationale for this change
Fixing an off-by-one rounding issue with decimal128 by ensuring proper precision handling.

### Are these changes tested?
The test case which reproduced the rounding issue has been added as a unit test.

* Closes: #38477

Authored-by: Matt Topol <zotthewizard@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/arrow/decimal128/decimal128.go      | 4 ++--
 go/arrow/decimal128/decimal128_test.go | 9 +++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/go/arrow/decimal128/decimal128.go b/go/arrow/decimal128/decimal128.go
index 7bde39d327417..3b88dce1fa809 100644
--- a/go/arrow/decimal128/decimal128.go
+++ b/go/arrow/decimal128/decimal128.go
@@ -261,7 +261,7 @@ func FromString(v string, prec, scale int32) (n Num, err error) {
 	var precInBits = uint(math.Round(float64(prec+scale+1)/math.Log10(2))) + 1
 
 	var out *big.Float
-	out, _, err = big.ParseFloat(v, 10, 127, big.ToNearestEven)
+	out, _, err = big.ParseFloat(v, 10, 128, big.ToNearestEven)
 	if err != nil {
 		return
 	}
@@ -280,7 +280,7 @@ func FromString(v string, prec, scale int32) (n Num, err error) {
 		// (e.g. C++) handles Decimal values. So if we're negative we'll subtract 0.5 and if
 		// we're positive we'll add 0.5.
 		p := (&big.Float{}).SetInt(scaleMultipliers[scale].BigInt())
-		out.Mul(out, p).SetPrec(precInBits)
+		out.SetPrec(precInBits).Mul(out, p)
 		if out.Signbit() {
 			out.Sub(out, pt5)
 		} else {
diff --git a/go/arrow/decimal128/decimal128_test.go b/go/arrow/decimal128/decimal128_test.go
index 879f2849028f8..4cfd7db20db08 100644
--- a/go/arrow/decimal128/decimal128_test.go
+++ b/go/arrow/decimal128/decimal128_test.go
@@ -24,6 +24,7 @@ import (
 
 	"github.com/apache/arrow/go/v15/arrow/decimal128"
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 )
 
 func TestFromU64(t *testing.T) {
@@ -698,3 +699,11 @@ func TestBitLen(t *testing.T) {
 	_, err = decimal128.FromString(b.String(), decimal128.MaxPrecision, -1)
 	assert.ErrorContains(t, err, "bitlen too large for decimal128")
 }
+
+func TestFromStringDecimal128b(t *testing.T) {
+	const decStr = "9323406071781562130.6457232358109488923"
+
+	num, err := decimal128.FromString(decStr, 38, 19)
+	require.NoError(t, err)
+	assert.Equal(t, decStr, num.ToString(19))
+}

From cd0d7f53b3ab7dfac7a3477751a87586d4da3782 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 14 Nov 2023 17:31:08 +0100
Subject: [PATCH 05/23] MINOR: [Python] Fix name of new keyword in the
 concat_tables future warning (#38710)

We renamed the new keyword in a final iteration of the PR, but apparently forgot to update the warning message.

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/table.pxi | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index bbf60416de995..e55a0d1dd54cb 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -5226,7 +5226,8 @@ def concat_tables(tables, MemoryPool memory_pool=None, str promote_options="none
 
     if "promote" in kwargs:
         warnings.warn(
-            "promote has been superseded by mode='default'.", FutureWarning, stacklevel=2)
+            "promote has been superseded by promote_options='default'.",
+            FutureWarning, stacklevel=2)
         if kwargs['promote'] is True:
             promote_options = "default"
 

From 26149d9fab0360e6d4d9a295f934100470c4bc37 Mon Sep 17 00:00:00 2001
From: Matt Topol <zotthewizard@gmail.com>
Date: Tue, 14 Nov 2023 11:44:19 -0500
Subject: [PATCH 06/23] GH-38718: [Go][Format][Integration] Add
 StringView/BinaryView to Go implementation (#35769)

### Rationale for this change
See #35628 for the rationale and description of the StringView/BinaryView array types.

This change is adding Go as a second implementation of it.

### What changes are included in this PR?

Add Array Types for `StringView` and `BinaryView` along with `StringViewType` and `BinaryViewType` and necessary enums and builders. These arrays can be round tripped through JSON and IPC.

### Are these changes tested?
Yes, unit tests have been added and integration tests run

* Closes: [#38718](https://github.com/apache/arrow/issues/38718)
* Closes: #38718

Lead-authored-by: Matt Topol <zotthewizard@gmail.com>
Co-authored-by: Alex Shcherbakov <candiduslynx@users.noreply.github.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 .gitattributes                                |   3 +
 docs/source/status.rst                        |   4 +
 format/Schema.fbs                             |   2 +-
 go/arrow/array/array.go                       |   3 +-
 go/arrow/array/binary.go                      | 121 +++++++
 go/arrow/array/binary_test.go                 |  24 ++
 go/arrow/array/binarybuilder.go               | 329 ++++++++++++++++++
 go/arrow/array/bufferbuilder.go               | 108 ++++++
 go/arrow/array/builder.go                     |   4 +
 go/arrow/array/compare.go                     |  12 +
 go/arrow/array/concat.go                      |  30 +-
 go/arrow/array/concat_test.go                 |   3 +
 go/arrow/array/string.go                      | 196 ++++++++++-
 go/arrow/array/string_test.go                 | 173 +++++++++
 go/arrow/compute/executor.go                  |   5 +-
 go/arrow/datatype.go                          |   7 +
 go/arrow/datatype_binary.go                   |  41 +++
 go/arrow/datatype_binary_test.go              |  30 ++
 go/arrow/datatype_viewheader.go               | 141 ++++++++
 go/arrow/datatype_viewheader_inline.go        |  31 ++
 go/arrow/datatype_viewheader_inline_go1.19.go |  35 ++
 go/arrow/datatype_viewheader_inline_tinygo.go |  35 ++
 go/arrow/internal/arrdata/arrdata.go          |  81 +++++
 go/arrow/internal/arrjson/arrjson.go          | 150 ++++++++
 go/arrow/internal/arrjson/arrjson_test.go     | 259 ++++++++++++++
 go/arrow/internal/flatbuf/MetadataVersion.go  |   2 +-
 .../internal/testing/gen/random_array_gen.go  |  34 ++
 go/arrow/ipc/endian_swap.go                   |   4 +
 go/arrow/ipc/file_reader.go                   |  38 +-
 go/arrow/ipc/message.go                       |  10 +-
 go/arrow/ipc/metadata.go                      |  40 ++-
 go/arrow/ipc/writer.go                        |  36 +-
 go/arrow/type_traits_view.go                  |  53 +++
 33 files changed, 2011 insertions(+), 33 deletions(-)
 create mode 100644 go/arrow/datatype_viewheader.go
 create mode 100644 go/arrow/datatype_viewheader_inline.go
 create mode 100644 go/arrow/datatype_viewheader_inline_go1.19.go
 create mode 100644 go/arrow/datatype_viewheader_inline_tinygo.go
 create mode 100644 go/arrow/type_traits_view.go

diff --git a/.gitattributes b/.gitattributes
index 69f4139c4e4f4..70007c26c8b9b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -3,6 +3,9 @@ cpp/src/generated/*.cpp linguist-generated=true
 cpp/src/generated/*.h linguist-generated=true
 go/**/*.s linguist-generated=true
 go/arrow/unionmode_string.go linguist-generated=true
+go/arrow/internal/flatbuf/*.go linguist-generated=true
+go/**/*.pb.go linguist-generated=true
+go/parquet/internal/gen-go/parquet/*.go linguist-generated=true
 r/R/RcppExports.R linguist-generated=true
 r/R/arrowExports.R linguist-generated=true
 r/src/RcppExports.cpp linguist-generated=true
diff --git a/docs/source/status.rst b/docs/source/status.rst
index c8c0e6dfc1dfe..c059ab3cef971 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -68,6 +68,10 @@ Data Types
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Large Utf8        | ✓     | ✓     | ✓     |            |       |  ✓    | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+| Binary View       | ✓     |       | ✓     |            |       |       |       |       |
++-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+| String View       | ✓     |       | ✓     |            |       |       |       |       |
++-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Data type         | C++   | Java  | Go    | JavaScript | C#    | Rust  | Julia | Swift |
diff --git a/format/Schema.fbs b/format/Schema.fbs
index 6adbcb115cde3..dbf482e6cc786 100644
--- a/format/Schema.fbs
+++ b/format/Schema.fbs
@@ -40,7 +40,7 @@ enum MetadataVersion:short {
   /// >= 0.8.0 (December 2017). Non-backwards compatible with V3.
   V4,
 
-  /// >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4
+  /// >= 1.0.0 (July 2020). Backwards compatible with V4 (V5 readers can read V4
   /// metadata and IPC messages). Implementations are recommended to provide a
   /// V4 compatibility mode with V5 format changes disabled.
   ///
diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go
index bbe301ee661f3..5aacc8f99a4ee 100644
--- a/go/arrow/array/array.go
+++ b/go/arrow/array/array.go
@@ -178,7 +178,8 @@ func init() {
 		arrow.RUN_END_ENCODED:         func(data arrow.ArrayData) arrow.Array { return NewRunEndEncodedData(data) },
 		arrow.LIST_VIEW:               func(data arrow.ArrayData) arrow.Array { return NewListViewData(data) },
 		arrow.LARGE_LIST_VIEW:         func(data arrow.ArrayData) arrow.Array { return NewLargeListViewData(data) },
-
+		arrow.BINARY_VIEW:             func(data arrow.ArrayData) arrow.Array { return NewBinaryViewData(data) },
+		arrow.STRING_VIEW:             func(data arrow.ArrayData) arrow.Array { return NewStringViewData(data) },
 		// invalid data types to fill out array to size 2^6 - 1
 		63: invalidDataType,
 	}
diff --git a/go/arrow/array/binary.go b/go/arrow/array/binary.go
index bf27139fddbaa..c226297da04c6 100644
--- a/go/arrow/array/binary.go
+++ b/go/arrow/array/binary.go
@@ -24,6 +24,7 @@ import (
 	"unsafe"
 
 	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v15/arrow/memory"
 	"github.com/apache/arrow/go/v15/internal/json"
 )
 
@@ -318,6 +319,126 @@ func arrayEqualLargeBinary(left, right *LargeBinary) bool {
 	return true
 }
 
+type ViewLike interface {
+	arrow.Array
+	ValueHeader(int) *arrow.ViewHeader
+}
+
+type BinaryView struct {
+	array
+	values      []arrow.ViewHeader
+	dataBuffers []*memory.Buffer
+}
+
+func NewBinaryViewData(data arrow.ArrayData) *BinaryView {
+	a := &BinaryView{}
+	a.refCount = 1
+	a.setData(data.(*Data))
+	return a
+}
+
+func (a *BinaryView) setData(data *Data) {
+	if len(data.buffers) < 2 {
+		panic("len(data.buffers) < 2")
+	}
+	a.array.setData(data)
+
+	if valueData := data.buffers[1]; valueData != nil {
+		a.values = arrow.ViewHeaderTraits.CastFromBytes(valueData.Bytes())
+	}
+
+	a.dataBuffers = data.buffers[2:]
+}
+
+func (a *BinaryView) ValueHeader(i int) *arrow.ViewHeader {
+	if i < 0 || i >= a.array.data.length {
+		panic("arrow/array: index out of range")
+	}
+	return &a.values[a.array.data.offset+i]
+}
+
+func (a *BinaryView) Value(i int) []byte {
+	s := a.ValueHeader(i)
+	if s.IsInline() {
+		return s.InlineBytes()
+	}
+	start := s.BufferOffset()
+	buf := a.dataBuffers[s.BufferIndex()]
+	return buf.Bytes()[start : start+int32(s.Len())]
+}
+
+// ValueString returns the value at index i as a string instead of
+// a byte slice, without copying the underlying data.
+func (a *BinaryView) ValueString(i int) string {
+	b := a.Value(i)
+	return *(*string)(unsafe.Pointer(&b))
+}
+
+func (a *BinaryView) String() string {
+	var o strings.Builder
+	o.WriteString("[")
+	for i := 0; i < a.Len(); i++ {
+		if i > 0 {
+			o.WriteString(" ")
+		}
+		switch {
+		case a.IsNull(i):
+			o.WriteString(NullValueStr)
+		default:
+			fmt.Fprintf(&o, "%q", a.ValueString(i))
+		}
+	}
+	o.WriteString("]")
+	return o.String()
+}
+
+// ValueStr is paired with AppendValueFromString in that it returns
+// the value at index i as a string: Semantically this means that for
+// a null value it will return the string "(null)", otherwise it will
+// return the value as a base64 encoded string suitable for CSV/JSON.
+//
+// This is always going to be less performant than just using ValueString
+// and exists to fulfill the Array interface to provide a method which
+// can produce a human readable string for a given index.
+func (a *BinaryView) ValueStr(i int) string {
+	if a.IsNull(i) {
+		return NullValueStr
+	}
+	return base64.StdEncoding.EncodeToString(a.Value(i))
+}
+
+func (a *BinaryView) GetOneForMarshal(i int) interface{} {
+	if a.IsNull(i) {
+		return nil
+	}
+	return a.Value(i)
+}
+
+func (a *BinaryView) MarshalJSON() ([]byte, error) {
+	vals := make([]interface{}, a.Len())
+	for i := 0; i < a.Len(); i++ {
+		vals[i] = a.GetOneForMarshal(i)
+	}
+	// golang marshal standard says that []byte will be marshalled
+	// as a base64-encoded string
+	return json.Marshal(vals)
+}
+
+func arrayEqualBinaryView(left, right *BinaryView) bool {
+	leftBufs, rightBufs := left.dataBuffers, right.dataBuffers
+	for i := 0; i < left.Len(); i++ {
+		if left.IsNull(i) {
+			continue
+		}
+		if !left.ValueHeader(i).Equals(leftBufs, right.ValueHeader(i), rightBufs) {
+			return false
+		}
+	}
+	return true
+}
+
 var (
 	_ arrow.Array = (*Binary)(nil)
+	_ arrow.Array = (*LargeBinary)(nil)
+	_ arrow.Array = (*BinaryView)(nil)
 )
diff --git a/go/arrow/array/binary_test.go b/go/arrow/array/binary_test.go
index 9c1770950a8b5..c9e165515225b 100644
--- a/go/arrow/array/binary_test.go
+++ b/go/arrow/array/binary_test.go
@@ -700,3 +700,27 @@ func TestBinaryStringRoundTrip(t *testing.T) {
 
 	assert.True(t, Equal(arr, arr1))
 }
+
+func TestBinaryViewStringRoundTrip(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(t, 0)
+
+	values := []string{"a", "bc", "", "", "supercalifragilistic", "", "expeallodocious"}
+	valid := []bool{true, true, false, false, true, true, true}
+
+	b := NewBinaryViewBuilder(mem)
+	defer b.Release()
+
+	b.AppendStringValues(values, valid)
+	arr := b.NewArray().(*BinaryView)
+	defer arr.Release()
+
+	for i := 0; i < arr.Len(); i++ {
+		assert.NoError(t, b.AppendValueFromString(arr.ValueStr(i)))
+	}
+
+	arr1 := b.NewArray().(*BinaryView)
+	defer arr1.Release()
+
+	assert.True(t, Equal(arr, arr1))
+}
diff --git a/go/arrow/array/binarybuilder.go b/go/arrow/array/binarybuilder.go
index a51bc799e4965..21ad576508e9e 100644
--- a/go/arrow/array/binarybuilder.go
+++ b/go/arrow/array/binarybuilder.go
@@ -23,6 +23,7 @@ import (
 	"math"
 	"reflect"
 	"sync/atomic"
+	"unsafe"
 
 	"github.com/apache/arrow/go/v15/arrow"
 	"github.com/apache/arrow/go/v15/arrow/internal/debug"
@@ -370,6 +371,334 @@ func (b *BinaryBuilder) UnmarshalJSON(data []byte) error {
 	return b.Unmarshal(dec)
 }
 
+const (
+	dfltBlockSize            = 32 << 10 // 32 KB
+	viewValueSizeLimit int32 = math.MaxInt32
+)
+
+type BinaryViewBuilder struct {
+	builder
+	dtype arrow.BinaryDataType
+
+	data    *memory.Buffer
+	rawData []arrow.ViewHeader
+
+	blockBuilder multiBufferBuilder
+}
+
+func NewBinaryViewBuilder(mem memory.Allocator) *BinaryViewBuilder {
+	return &BinaryViewBuilder{
+		dtype: arrow.BinaryTypes.BinaryView,
+		builder: builder{
+			refCount: 1,
+			mem:      mem,
+		},
+		blockBuilder: multiBufferBuilder{
+			refCount:  1,
+			blockSize: dfltBlockSize,
+			mem:       mem,
+		},
+	}
+}
+
+func (b *BinaryViewBuilder) SetBlockSize(sz uint) {
+	b.blockBuilder.blockSize = int(sz)
+}
+
+func (b *BinaryViewBuilder) Type() arrow.DataType { return b.dtype }
+
+func (b *BinaryViewBuilder) Release() {
+	debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases")
+
+	if atomic.AddInt64(&b.refCount, -1) != 0 {
+		return
+	}
+
+	if b.nullBitmap != nil {
+		b.nullBitmap.Release()
+		b.nullBitmap = nil
+	}
+	if b.data != nil {
+		b.data.Release()
+		b.data = nil
+		b.rawData = nil
+	}
+}
+
+func (b *BinaryViewBuilder) init(capacity int) {
+	b.builder.init(capacity)
+	b.data = memory.NewResizableBuffer(b.mem)
+	bytesN := arrow.ViewHeaderTraits.BytesRequired(capacity)
+	b.data.Resize(bytesN)
+	b.rawData = arrow.ViewHeaderTraits.CastFromBytes(b.data.Bytes())
+}
+
+func (b *BinaryViewBuilder) Resize(n int) {
+	nbuild := n
+	if n < minBuilderCapacity {
+		n = minBuilderCapacity
+	}
+
+	if b.capacity == 0 {
+		b.init(n)
+		return
+	}
+
+	b.builder.resize(nbuild, b.init)
+	b.data.Resize(arrow.ViewHeaderTraits.BytesRequired(n))
+	b.rawData = arrow.ViewHeaderTraits.CastFromBytes(b.data.Bytes())
+}
+
+func (b *BinaryViewBuilder) ReserveData(length int) {
+	if int32(length) > viewValueSizeLimit {
+		panic(fmt.Errorf("%w: BinaryView or StringView elements cannot reference strings larger than 2GB",
+			arrow.ErrInvalid))
+	}
+	b.blockBuilder.Reserve(int(length))
+}
+
+func (b *BinaryViewBuilder) Reserve(n int) {
+	b.builder.reserve(n, b.Resize)
+}
+
+func (b *BinaryViewBuilder) Append(v []byte) {
+	if int32(len(v)) > viewValueSizeLimit {
+		panic(fmt.Errorf("%w: BinaryView or StringView elements cannot reference strings larger than 2GB", arrow.ErrInvalid))
+	}
+
+	if !arrow.IsViewInline(len(v)) {
+		b.ReserveData(len(v))
+	}
+
+	b.Reserve(1)
+	b.UnsafeAppend(v)
+}
+
+// AppendString is identical to Append, only accepting a string instead
+// of a byte slice, avoiding the extra copy that would occur if you simply
+// did []byte(v).
+//
+// This is different than AppendValueFromString which exists for the
+// Builder interface, in that this expects raw binary data which is
+// appended unmodified. AppendValueFromString expects base64 encoded binary
+// data instead.
+func (b *BinaryViewBuilder) AppendString(v string) {
+	// create a []byte without copying the bytes
+	// in go1.20 this would be unsafe.StringData
+	val := *(*[]byte)(unsafe.Pointer(&struct {
+		string
+		int
+	}{v, len(v)}))
+	b.Append(val)
+}
+
+func (b *BinaryViewBuilder) AppendNull() {
+	b.Reserve(1)
+	b.UnsafeAppendBoolToBitmap(false)
+}
+
+func (b *BinaryViewBuilder) AppendNulls(n int) {
+	b.Reserve(n)
+	for i := 0; i < n; i++ {
+		b.UnsafeAppendBoolToBitmap(false)
+	}
+}
+
+func (b *BinaryViewBuilder) AppendEmptyValue() {
+	b.Reserve(1)
+	b.UnsafeAppendBoolToBitmap(true)
+}
+
+func (b *BinaryViewBuilder) AppendEmptyValues(n int) {
+	b.Reserve(n)
+	b.unsafeAppendBoolsToBitmap(nil, n)
+}
+
+func (b *BinaryViewBuilder) UnsafeAppend(v []byte) {
+	hdr := &b.rawData[b.length]
+	hdr.SetBytes(v)
+	if !hdr.IsInline() {
+		b.blockBuilder.UnsafeAppend(hdr, v)
+	}
+	b.UnsafeAppendBoolToBitmap(true)
+}
+
+func (b *BinaryViewBuilder) AppendValues(v [][]byte, valid []bool) {
+	if len(v) != len(valid) && len(valid) != 0 {
+		panic("len(v) != len(valid) && len(valid) != 0")
+	}
+
+	if len(v) == 0 {
+		return
+	}
+
+	b.Reserve(len(v))
+	outOfLineTotal := 0
+	for i, vv := range v {
+		if len(valid) == 0 || valid[i] {
+			if !arrow.IsViewInline(len(vv)) {
+				outOfLineTotal += len(vv)
+			}
+		}
+	}
+
+	b.ReserveData(outOfLineTotal)
+	for i, vv := range v {
+		if len(valid) == 0 || valid[i] {
+			hdr := &b.rawData[b.length+i]
+			hdr.SetBytes(vv)
+			if !hdr.IsInline() {
+				b.blockBuilder.UnsafeAppend(hdr, vv)
+			}
+		}
+	}
+
+	b.builder.unsafeAppendBoolsToBitmap(valid, len(v))
+}
+
+func (b *BinaryViewBuilder) AppendStringValues(v []string, valid []bool) {
+	if len(v) != len(valid) && len(valid) != 0 {
+		panic("len(v) != len(valid) && len(valid) != 0")
+	}
+
+	if len(v) == 0 {
+		return
+	}
+
+	b.Reserve(len(v))
+	outOfLineTotal := 0
+	for i, vv := range v {
+		if len(valid) == 0 || valid[i] {
+			if !arrow.IsViewInline(len(vv)) {
+				outOfLineTotal += len(vv)
+			}
+		}
+	}
+
+	b.ReserveData(outOfLineTotal)
+	for i, vv := range v {
+		if len(valid) == 0 || valid[i] {
+			hdr := &b.rawData[b.length+i]
+			hdr.SetString(vv)
+			if !hdr.IsInline() {
+				b.blockBuilder.UnsafeAppendString(hdr, vv)
+			}
+		}
+	}
+
+	b.builder.unsafeAppendBoolsToBitmap(valid, len(v))
+}
+
+// AppendValueFromString is paired with ValueStr for fulfilling the
+// base Builder interface. This is intended to read in a human-readable
+// string such as from CSV or JSON and append it to the array.
+//
+// For Binary values are expected to be base64 encoded (and will be
+// decoded as such before being appended).
+func (b *BinaryViewBuilder) AppendValueFromString(s string) error {
+	if s == NullValueStr {
+		b.AppendNull()
+		return nil
+	}
+
+	if b.dtype.IsUtf8() {
+		b.Append([]byte(s))
+		return nil
+	}
+
+	decodedVal, err := base64.StdEncoding.DecodeString(s)
+	if err != nil {
+		return fmt.Errorf("could not decode base64 string: %w", err)
+	}
+	b.Append(decodedVal)
+	return nil
+}
+
+func (b *BinaryViewBuilder) UnmarshalOne(dec *json.Decoder) error {
+	t, err := dec.Token()
+	if err != nil {
+		return err
+	}
+
+	switch v := t.(type) {
+	case string:
+		data, err := base64.StdEncoding.DecodeString(v)
+		if err != nil {
+			return err
+		}
+		b.Append(data)
+	case []byte:
+		b.Append(v)
+	case nil:
+		b.AppendNull()
+	default:
+		return &json.UnmarshalTypeError{
+			Value:  fmt.Sprint(t),
+			Type:   reflect.TypeOf([]byte{}),
+			Offset: dec.InputOffset(),
+		}
+	}
+	return nil
+}
+
+func (b *BinaryViewBuilder) Unmarshal(dec *json.Decoder) error {
+	for dec.More() {
+		if err := b.UnmarshalOne(dec); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (b *BinaryViewBuilder) UnmarshalJSON(data []byte) error {
+	dec := json.NewDecoder(bytes.NewReader(data))
+	t, err := dec.Token()
+	if err != nil {
+		return err
+	}
+
+	if delim, ok := t.(json.Delim); !ok || delim != '[' {
+		return fmt.Errorf("binary view builder must unpack from json array, found %s", delim)
+	}
+
+	return b.Unmarshal(dec)
+}
+
+func (b *BinaryViewBuilder) newData() (data *Data) {
+	bytesRequired := arrow.ViewHeaderTraits.BytesRequired(b.length)
+	if bytesRequired > 0 && bytesRequired < b.data.Len() {
+		// trim buffers
+		b.data.Resize(bytesRequired)
+	}
+
+	dataBuffers := b.blockBuilder.Finish()
+	data = NewData(b.dtype, b.length, append([]*memory.Buffer{
+		b.nullBitmap, b.data}, dataBuffers...), nil, b.nulls, 0)
+	b.reset()
+
+	if b.data != nil {
+		b.data.Release()
+		b.data = nil
+		b.rawData = nil
+		for _, buf := range dataBuffers {
+			buf.Release()
+		}
+	}
+	return
+}
+
+func (b *BinaryViewBuilder) NewBinaryViewArray() (a *BinaryView) {
+	data := b.newData()
+	a = NewBinaryViewData(data)
+	data.Release()
+	return
+}
+
+func (b *BinaryViewBuilder) NewArray() arrow.Array {
+	return b.NewBinaryViewArray()
+}
+
 var (
 	_ Builder = (*BinaryBuilder)(nil)
+	_ Builder = (*BinaryViewBuilder)(nil)
 )
diff --git a/go/arrow/array/bufferbuilder.go b/go/arrow/array/bufferbuilder.go
index cb381e25b32a2..13741ba8926ac 100644
--- a/go/arrow/array/bufferbuilder.go
+++ b/go/arrow/array/bufferbuilder.go
@@ -18,7 +18,9 @@ package array
 
 import (
 	"sync/atomic"
+	"unsafe"
 
+	"github.com/apache/arrow/go/v15/arrow"
 	"github.com/apache/arrow/go/v15/arrow/bitutil"
 	"github.com/apache/arrow/go/v15/arrow/internal/debug"
 	"github.com/apache/arrow/go/v15/arrow/memory"
@@ -151,3 +153,109 @@ func (b *bufferBuilder) unsafeAppend(data []byte) {
 	copy(b.bytes[b.length:], data)
 	b.length += len(data)
 }
+
+type multiBufferBuilder struct {
+	refCount  int64
+	blockSize int
+
+	mem              memory.Allocator
+	blocks           []*memory.Buffer
+	currentOutBuffer int
+}
+
+// Retain increases the reference count by 1.
+// Retain may be called simultaneously from multiple goroutines.
+func (b *multiBufferBuilder) Retain() {
+	atomic.AddInt64(&b.refCount, 1)
+}
+
+// Release decreases the reference count by 1.
+// When the reference count goes to zero, the memory is freed.
+// Release may be called simultaneously from multiple goroutines.
+func (b *multiBufferBuilder) Release() {
+	debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases")
+
+	if atomic.AddInt64(&b.refCount, -1) == 0 {
+		b.Reset()
+	}
+}
+
+func (b *multiBufferBuilder) Reserve(nbytes int) {
+	if len(b.blocks) == 0 {
+		out := memory.NewResizableBuffer(b.mem)
+		if nbytes < b.blockSize {
+			nbytes = b.blockSize
+		}
+		out.Reserve(nbytes)
+		b.currentOutBuffer = 0
+		b.blocks = []*memory.Buffer{out}
+		return
+	}
+
+	curBuf := b.blocks[b.currentOutBuffer]
+	remain := curBuf.Cap() - curBuf.Len()
+	if nbytes <= remain {
+		return
+	}
+
+	// search for underfull block that has enough bytes
+	for i, block := range b.blocks {
+		remaining := block.Cap() - block.Len()
+		if nbytes <= remaining {
+			b.currentOutBuffer = i
+			return
+		}
+	}
+
+	// current buffer doesn't have enough space, no underfull buffers
+	// make new buffer and set that as our current.
+	newBuf := memory.NewResizableBuffer(b.mem)
+	if nbytes < b.blockSize {
+		nbytes = b.blockSize
+	}
+
+	newBuf.Reserve(nbytes)
+	b.currentOutBuffer = len(b.blocks)
+	b.blocks = append(b.blocks, newBuf)
+}
+
+func (b *multiBufferBuilder) RemainingBytes() int {
+	if len(b.blocks) == 0 {
+		return 0
+	}
+
+	buf := b.blocks[b.currentOutBuffer]
+	return buf.Cap() - buf.Len()
+}
+
+func (b *multiBufferBuilder) Reset() {
+	b.currentOutBuffer = 0
+	for _, block := range b.Finish() {
+		block.Release()
+	}
+}
+
+func (b *multiBufferBuilder) UnsafeAppend(hdr *arrow.ViewHeader, val []byte) {
+	buf := b.blocks[b.currentOutBuffer]
+	idx, offset := b.currentOutBuffer, buf.Len()
+	hdr.SetIndexOffset(int32(idx), int32(offset))
+
+	n := copy(buf.Buf()[offset:], val)
+	buf.ResizeNoShrink(offset + n)
+}
+
+func (b *multiBufferBuilder) UnsafeAppendString(hdr *arrow.ViewHeader, val string) {
+	// create a byte slice with zero-copies
+	// in go1.20 this would be equivalent to unsafe.StringData
+	v := *(*[]byte)(unsafe.Pointer(&struct {
+		string
+		int
+	}{val, len(val)}))
+	b.UnsafeAppend(hdr, v)
+}
+
+func (b *multiBufferBuilder) Finish() (out []*memory.Buffer) {
+	b.currentOutBuffer = 0
+	out, b.blocks = b.blocks, nil
+	return
+}
diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go
index bb15298e03ccf..279804a1cdb9f 100644
--- a/go/arrow/array/builder.go
+++ b/go/arrow/array/builder.go
@@ -364,6 +364,10 @@ func NewBuilder(mem memory.Allocator, dtype arrow.DataType) Builder {
 	case arrow.RUN_END_ENCODED:
 		typ := dtype.(*arrow.RunEndEncodedType)
 		return NewRunEndEncodedBuilder(mem, typ.RunEnds(), typ.Encoded())
+	case arrow.BINARY_VIEW:
+		return NewBinaryViewBuilder(mem)
+	case arrow.STRING_VIEW:
+		return NewStringViewBuilder(mem)
 	}
 	panic(fmt.Errorf("arrow/array: unsupported builder for %T", dtype))
 }
diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go
index 778de41e32c67..372293a61d6cb 100644
--- a/go/arrow/array/compare.go
+++ b/go/arrow/array/compare.go
@@ -232,6 +232,12 @@ func Equal(left, right arrow.Array) bool {
 	case *LargeString:
 		r := right.(*LargeString)
 		return arrayEqualLargeString(l, r)
+	case *BinaryView:
+		r := right.(*BinaryView)
+		return arrayEqualBinaryView(l, r)
+	case *StringView:
+		r := right.(*StringView)
+		return arrayEqualStringView(l, r)
 	case *Int8:
 		r := right.(*Int8)
 		return arrayEqualInt8(l, r)
@@ -482,6 +488,12 @@ func arrayApproxEqual(left, right arrow.Array, opt equalOption) bool {
 	case *LargeString:
 		r := right.(*LargeString)
 		return arrayEqualLargeString(l, r)
+	case *BinaryView:
+		r := right.(*BinaryView)
+		return arrayEqualBinaryView(l, r)
+	case *StringView:
+		r := right.(*StringView)
+		return arrayEqualStringView(l, r)
 	case *Int8:
 		r := right.(*Int8)
 		return arrayEqualInt8(l, r)
diff --git a/go/arrow/array/concat.go b/go/arrow/array/concat.go
index 53c5be06895b9..fa3554c1c0555 100644
--- a/go/arrow/array/concat.go
+++ b/go/arrow/array/concat.go
@@ -600,6 +600,35 @@ func concat(data []arrow.ArrayData, mem memory.Allocator) (arr arrow.ArrayData,
 		}
 	case arrow.FixedWidthDataType:
 		out.buffers[1] = concatBuffers(gatherBuffersFixedWidthType(data, 1, dt), mem)
+	case arrow.BinaryViewDataType:
+		out.buffers = out.buffers[:2]
+		for _, d := range data {
+			for _, buf := range d.Buffers()[2:] {
+				buf.Retain()
+				out.buffers = append(out.buffers, buf)
+			}
+		}
+
+		out.buffers[1] = concatBuffers(gatherFixedBuffers(data, 1, arrow.ViewHeaderSizeBytes), mem)
+
+		var (
+			s                  = arrow.ViewHeaderTraits.CastFromBytes(out.buffers[1].Bytes())
+			i                  = data[0].Len()
+			precedingBufsCount int
+		)
+
+		for idx := 1; idx < len(data); idx++ {
+			precedingBufsCount += len(data[idx-1].Buffers()) - 2
+
+			for end := i + data[idx].Len(); i < end; i++ {
+				if s[i].IsInline() {
+					continue
+				}
+
+				bufIndex := s[i].BufferIndex() + int32(precedingBufsCount)
+				s[i].SetIndexOffset(bufIndex, s[i].BufferOffset())
+			}
+		}
 	case arrow.BinaryDataType:
 		offsetWidth := dt.Layout().Buffers[1].ByteWidth
 		offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, offsetWidth), offsetWidth, mem)
@@ -739,7 +768,6 @@ func concat(data []arrow.ArrayData, mem memory.Allocator) (arr arrow.ArrayData,
 			out.childData[0].Release()
 			return nil, err
 		}
-
 	default:
 		return nil, fmt.Errorf("concatenate not implemented for type %s", dt)
 	}
diff --git a/go/arrow/array/concat_test.go b/go/arrow/array/concat_test.go
index 1cc484ad1a923..7b22d97a41e00 100644
--- a/go/arrow/array/concat_test.go
+++ b/go/arrow/array/concat_test.go
@@ -84,6 +84,7 @@ func TestConcatenate(t *testing.T) {
 		{arrow.StructOf()},
 		{arrow.MapOf(arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int8)},
 		{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: arrow.PrimitiveTypes.Float64}},
+		{arrow.BinaryTypes.StringView},
 	}
 
 	for _, tt := range tests {
@@ -150,6 +151,8 @@ func (cts *ConcatTestSuite) generateArr(size int64, nullprob float64) arrow.Arra
 		return cts.rng.String(size, 0, 15, nullprob)
 	case arrow.LARGE_STRING:
 		return cts.rng.LargeString(size, 0, 15, nullprob)
+	case arrow.STRING_VIEW:
+		return cts.rng.StringView(size, 0, 20, nullprob)
 	case arrow.LIST:
 		valuesSize := size * 4
 		values := cts.rng.Int8(valuesSize, 0, 127, nullprob).(*array.Int8)
diff --git a/go/arrow/array/string.go b/go/arrow/array/string.go
index 9ab7c938ef5d8..90a4628f0d0fb 100644
--- a/go/arrow/array/string.go
+++ b/go/arrow/array/string.go
@@ -28,6 +28,11 @@ import (
 	"github.com/apache/arrow/go/v15/internal/json"
 )
 
+type StringLike interface {
+	arrow.Array
+	Value(int) string
+}
+
 // String represents an immutable sequence of variable-length UTF-8 strings.
 type String struct {
 	array
@@ -310,6 +315,108 @@ func arrayEqualLargeString(left, right *LargeString) bool {
 	return true
 }
 
+type StringView struct {
+	array
+	values      []arrow.ViewHeader
+	dataBuffers []*memory.Buffer
+}
+
+func NewStringViewData(data arrow.ArrayData) *StringView {
+	a := &StringView{}
+	a.refCount = 1
+	a.setData(data.(*Data))
+	return a
+}
+
+// Reset resets the String with a different set of Data.
+func (a *StringView) Reset(data arrow.ArrayData) {
+	a.setData(data.(*Data))
+}
+
+func (a *StringView) setData(data *Data) {
+	if len(data.buffers) < 2 {
+		panic("len(data.buffers) < 2")
+	}
+	a.array.setData(data)
+
+	if valueData := data.buffers[1]; valueData != nil {
+		a.values = arrow.ViewHeaderTraits.CastFromBytes(valueData.Bytes())
+	}
+
+	a.dataBuffers = data.buffers[2:]
+}
+
+func (a *StringView) ValueHeader(i int) *arrow.ViewHeader {
+	if i < 0 || i >= a.array.data.length {
+		panic("arrow/array: index out of range")
+	}
+	return &a.values[a.array.data.offset+i]
+}
+
+func (a *StringView) Value(i int) string {
+	s := a.ValueHeader(i)
+	if s.IsInline() {
+		return s.InlineString()
+	}
+	start := s.BufferOffset()
+	buf := a.dataBuffers[s.BufferIndex()]
+	value := buf.Bytes()[start : start+int32(s.Len())]
+	return *(*string)(unsafe.Pointer(&value))
+}
+
+func (a *StringView) String() string {
+	var o strings.Builder
+	o.WriteString("[")
+	for i := 0; i < a.Len(); i++ {
+		if i > 0 {
+			o.WriteString(" ")
+		}
+		switch {
+		case a.IsNull(i):
+			o.WriteString(NullValueStr)
+		default:
+			fmt.Fprintf(&o, "%q", a.Value(i))
+		}
+	}
+	o.WriteString("]")
+	return o.String()
+}
+
+func (a *StringView) ValueStr(i int) string {
+	if a.IsNull(i) {
+		return NullValueStr
+	}
+	return a.Value(i)
+}
+
+func (a *StringView) GetOneForMarshal(i int) interface{} {
+	if a.IsNull(i) {
+		return nil
+	}
+	return a.Value(i)
+}
+
+func (a *StringView) MarshalJSON() ([]byte, error) {
+	vals := make([]interface{}, a.Len())
+	for i := 0; i < a.Len(); i++ {
+		vals[i] = a.GetOneForMarshal(i)
+	}
+	return json.Marshal(vals)
+}
+
+func arrayEqualStringView(left, right *StringView) bool {
+	leftBufs, rightBufs := left.dataBuffers, right.dataBuffers
+	for i := 0; i < left.Len(); i++ {
+		if left.IsNull(i) {
+			continue
+		}
+		if !left.ValueHeader(i).Equals(leftBufs, right.ValueHeader(i), rightBufs) {
+			return false
+		}
+	}
+	return true
+}
+
 // A StringBuilder is used to build a String array using the Append methods.
 type StringBuilder struct {
 	*BinaryBuilder
@@ -344,10 +451,6 @@ func (b *StringBuilder) Value(i int) string {
 	return string(b.BinaryBuilder.Value(i))
 }
 
-// func (b *StringBuilder) UnsafeAppend(v string) {
-// 	b.BinaryBuilder.UnsafeAppend([]byte(v))
-// }
-
 // NewArray creates a String array from the memory buffers used by the builder and resets the StringBuilder
 // so it can be used to build a new array.
 func (b *StringBuilder) NewArray() arrow.Array {
@@ -441,10 +544,6 @@ func (b *LargeStringBuilder) Value(i int) string {
 	return string(b.BinaryBuilder.Value(i))
 }
 
-// func (b *LargeStringBuilder) UnsafeAppend(v string) {
-// 	b.BinaryBuilder.UnsafeAppend([]byte(v))
-// }
-
 // NewArray creates a String array from the memory buffers used by the builder and resets the StringBuilder
 // so it can be used to build a new array.
 func (b *LargeStringBuilder) NewArray() arrow.Array {
@@ -504,9 +603,87 @@ func (b *LargeStringBuilder) UnmarshalJSON(data []byte) error {
 	return b.Unmarshal(dec)
 }
 
+type StringViewBuilder struct {
+	*BinaryViewBuilder
+}
+
+func NewStringViewBuilder(mem memory.Allocator) *StringViewBuilder {
+	bldr := &StringViewBuilder{
+		BinaryViewBuilder: NewBinaryViewBuilder(mem),
+	}
+	bldr.dtype = arrow.BinaryTypes.StringView
+	return bldr
+}
+
+func (b *StringViewBuilder) Append(v string) {
+	b.BinaryViewBuilder.AppendString(v)
+}
+
+func (b *StringViewBuilder) AppendValues(v []string, valid []bool) {
+	b.BinaryViewBuilder.AppendStringValues(v, valid)
+}
+
+func (b *StringViewBuilder) UnmarshalOne(dec *json.Decoder) error {
+	t, err := dec.Token()
+	if err != nil {
+		return err
+	}
+
+	switch v := t.(type) {
+	case string:
+		b.Append(v)
+	case []byte:
+		b.BinaryViewBuilder.Append(v)
+	case nil:
+		b.AppendNull()
+	default:
+		return &json.UnmarshalTypeError{
+			Value:  fmt.Sprint(t),
+			Type:   reflect.TypeOf([]byte{}),
+			Offset: dec.InputOffset(),
+		}
+	}
+	return nil
+}
+
+func (b *StringViewBuilder) Unmarshal(dec *json.Decoder) error {
+	for dec.More() {
+		if err := b.UnmarshalOne(dec); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (b *StringViewBuilder) UnmarshalJSON(data []byte) error {
+	dec := json.NewDecoder(bytes.NewReader(data))
+	t, err := dec.Token()
+	if err != nil {
+		return err
+	}
+
+	if delim, ok := t.(json.Delim); !ok || delim != '[' {
+		return fmt.Errorf("binary view builder must unpack from json array, found %s", delim)
+	}
+
+	return b.Unmarshal(dec)
+}
+
+func (b *StringViewBuilder) NewArray() arrow.Array {
+	return b.NewStringViewArray()
+}
+
+func (b *StringViewBuilder) NewStringViewArray() (a *StringView) {
+	data := b.newData()
+	a = NewStringViewData(data)
+	data.Release()
+	return
+}
+
 type StringLikeBuilder interface {
 	Builder
 	Append(string)
+	AppendValues([]string, []bool)
 	UnsafeAppend([]byte)
 	ReserveData(int)
 }
@@ -514,8 +691,11 @@ type StringLikeBuilder interface {
 var (
 	_ arrow.Array       = (*String)(nil)
 	_ arrow.Array       = (*LargeString)(nil)
+	_ arrow.Array       = (*StringView)(nil)
 	_ Builder           = (*StringBuilder)(nil)
 	_ Builder           = (*LargeStringBuilder)(nil)
+	_ Builder           = (*StringViewBuilder)(nil)
 	_ StringLikeBuilder = (*StringBuilder)(nil)
 	_ StringLikeBuilder = (*LargeStringBuilder)(nil)
+	_ StringLikeBuilder = (*StringViewBuilder)(nil)
 )
diff --git a/go/arrow/array/string_test.go b/go/arrow/array/string_test.go
index d743a3ec7f37f..803fae51347c1 100644
--- a/go/arrow/array/string_test.go
+++ b/go/arrow/array/string_test.go
@@ -619,3 +619,176 @@ func TestStringValueLen(t *testing.T) {
 		assert.Equal(t, len(v), slice.ValueLen(i))
 	}
 }
+func TestStringViewArray(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	var (
+		// only the last string is long enough to not get inlined
+		want   = []string{"hello", "世界", "", "say goodbye daffy"}
+		valids = []bool{true, true, false, true}
+	)
+
+	sb := array.NewStringViewBuilder(mem)
+	defer sb.Release()
+
+	sb.Retain()
+	sb.Release()
+
+	assert.NoError(t, sb.AppendValueFromString(want[0]))
+	sb.AppendValues(want[1:2], nil)
+
+	sb.AppendNull()
+	sb.Append(want[3])
+
+	if got, want := sb.Len(), len(want); got != want {
+		t.Fatalf("invalid len: got=%d, want=%d", got, want)
+	}
+
+	if got, want := sb.NullN(), 1; got != want {
+		t.Fatalf("invalid nulls: got=%d, want=%d", got, want)
+	}
+
+	arr := sb.NewStringViewArray()
+	defer arr.Release()
+
+	arr.Retain()
+	arr.Release()
+
+	assert.Equal(t, "hello", arr.ValueStr(0))
+
+	if got, want := arr.Len(), len(want); got != want {
+		t.Fatalf("invalid len: got=%d, want=%d", got, want)
+	}
+
+	if got, want := arr.NullN(), 1; got != want {
+		t.Fatalf("invalid nulls: got=%d, want=%d", got, want)
+	}
+
+	for i := range want {
+		if arr.IsNull(i) != !valids[i] {
+			t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i])
+		}
+		switch {
+		case arr.IsNull(i):
+		default:
+			got := arr.Value(i)
+			if got != want[i] {
+				t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i])
+			}
+		}
+	}
+
+	sub := array.MakeFromData(arr.Data())
+	defer sub.Release()
+
+	if sub.DataType().ID() != arrow.STRING_VIEW {
+		t.Fatalf("invalid type: got=%q, want=string view", sub.DataType().Name())
+	}
+
+	if _, ok := sub.(*array.StringView); !ok {
+		t.Fatalf("could not type-assert to array.String")
+	}
+
+	if got, want := arr.String(), `["hello" "世界" (null) "say goodbye daffy"]`; got != want {
+		t.Fatalf("got=%q, want=%q", got, want)
+	}
+
+	// only the last string gets stuck into a buffer the rest are inlined
+	// in the headers.
+	if !bytes.Equal([]byte(`say goodbye daffy`), arr.Data().Buffers()[2].Bytes()) {
+		t.Fatalf("got=%q, want=%q", string(arr.Data().Buffers()[2].Bytes()), `say goodbye daffy`)
+	}
+
+	// check the prefix for the non-inlined value
+	if [4]byte{'s', 'a', 'y', ' '} != arr.ValueHeader(3).Prefix() {
+		t.Fatalf("got=%q, want=%q", arr.ValueHeader(3).Prefix(), `say `)
+	}
+
+	slice := array.NewSliceData(arr.Data(), 2, 4)
+	defer slice.Release()
+
+	sub1 := array.MakeFromData(slice)
+	defer sub1.Release()
+
+	v, ok := sub1.(*array.StringView)
+	if !ok {
+		t.Fatalf("could not type-assert to array.StringView")
+	}
+
+	if got, want := v.String(), `[(null) "say goodbye daffy"]`; got != want {
+		t.Fatalf("got=%q, want=%q", got, want)
+	}
+
+	if !bytes.Equal([]byte(`say goodbye daffy`), v.Data().Buffers()[2].Bytes()) {
+		t.Fatalf("got=%q, want=%q", string(v.Data().Buffers()[2].Bytes()), `say goodbye daffy`)
+	}
+
+	// check the prefix for the non-inlined value
+	if [4]byte{'s', 'a', 'y', ' '} != v.ValueHeader(1).Prefix() {
+		t.Fatalf("got=%q, want=%q", v.ValueHeader(1).Prefix(), `say `)
+	}
+}
+
+func TestStringViewBuilder_Empty(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	want := []string{"hello", "世界", "", "say goodbye daffy"}
+
+	ab := array.NewStringViewBuilder(mem)
+	defer ab.Release()
+
+	stringValues := func(a *array.StringView) []string {
+		vs := make([]string, a.Len())
+		for i := range vs {
+			vs[i] = a.Value(i)
+		}
+		return vs
+	}
+
+	ab.AppendValues([]string{}, nil)
+	a := ab.NewStringViewArray()
+	assert.Zero(t, a.Len())
+	a.Release()
+
+	ab.AppendValues(nil, nil)
+	a = ab.NewStringViewArray()
+	assert.Zero(t, a.Len())
+	a.Release()
+
+	ab.AppendValues([]string{}, nil)
+	ab.AppendValues(want, nil)
+	a = ab.NewStringViewArray()
+	assert.Equal(t, want, stringValues(a))
+	a.Release()
+
+	ab.AppendValues(want, nil)
+	ab.AppendValues([]string{}, nil)
+	a = ab.NewStringViewArray()
+	assert.Equal(t, want, stringValues(a))
+	a.Release()
+}
+
+// TestStringReset tests the Reset() method on the String type by creating two different Strings and then
+// reseting the contents of string2 with the values from string1.
+func TestStringViewReset(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	sb1 := array.NewStringViewBuilder(mem)
+	sb2 := array.NewStringViewBuilder(mem)
+	defer sb1.Release()
+	defer sb2.Release()
+
+	sb1.Append("string1")
+	sb1.AppendNull()
+
+	var (
+		string1 = sb1.NewStringViewArray()
+		string2 = sb2.NewStringViewArray()
+
+		string1Data = string1.Data()
+	)
+	string2.Reset(string1Data)
+
+	assert.Equal(t, "string1", string2.Value(0))
+}
diff --git a/go/arrow/compute/executor.go b/go/arrow/compute/executor.go
index 1cba0b1e19f69..db89b206daf5f 100644
--- a/go/arrow/compute/executor.go
+++ b/go/arrow/compute/executor.go
@@ -171,6 +171,8 @@ func addComputeDataPrealloc(dt arrow.DataType, widths []bufferPrealloc) []buffer
 		return append(widths, bufferPrealloc{bitWidth: 32, addLen: 1})
 	case arrow.LARGE_BINARY, arrow.LARGE_STRING, arrow.LARGE_LIST:
 		return append(widths, bufferPrealloc{bitWidth: 64, addLen: 1})
+	case arrow.STRING_VIEW, arrow.BINARY_VIEW:
+		return append(widths, bufferPrealloc{bitWidth: arrow.ViewHeaderSizeBytes * 8})
 	}
 	return widths
 }
@@ -1007,9 +1009,10 @@ func (v *vectorExecutor) WrapResults(ctx context.Context, out <-chan Datum, hasC
 	case <-ctx.Done():
 		return nil
 	case output = <-out:
-		if output == nil {
+		if output == nil || ctx.Err() != nil {
 			return nil
 		}
+
 		// if the inputs contained at least one chunked array
 		// then we want to return chunked output
 		if hasChunked {
diff --git a/go/arrow/datatype.go b/go/arrow/datatype.go
index 24113b55899dc..1e5d8fb98aa59 100644
--- a/go/arrow/datatype.go
+++ b/go/arrow/datatype.go
@@ -210,6 +210,11 @@ type BinaryDataType interface {
 	binary()
 }
 
+type BinaryViewDataType interface {
+	BinaryDataType
+	view()
+}
+
 type OffsetsDataType interface {
 	DataType
 	OffsetTypeTraits() OffsetTraits
@@ -272,6 +277,8 @@ func (b BufferSpec) Equals(other BufferSpec) bool {
 type DataTypeLayout struct {
 	Buffers []BufferSpec
 	HasDict bool
+	// VariadicSpec is what the buffers beyond len(Buffers) are expected to conform to.
+	VariadicSpec *BufferSpec
 }
 
 func SpecFixedWidth(w int) BufferSpec { return BufferSpec{KindFixedWidth, w} }
diff --git a/go/arrow/datatype_binary.go b/go/arrow/datatype_binary.go
index a3a8568645052..f3e601f08ec79 100644
--- a/go/arrow/datatype_binary.go
+++ b/go/arrow/datatype_binary.go
@@ -83,16 +83,57 @@ func (t *LargeStringType) Layout() DataTypeLayout {
 func (t *LargeStringType) OffsetTypeTraits() OffsetTraits { return Int64Traits }
 func (LargeStringType) IsUtf8() bool                      { return true }
 
+type BinaryViewType struct{}
+
+func (*BinaryViewType) ID() Type              { return BINARY_VIEW }
+func (*BinaryViewType) Name() string          { return "binary_view" }
+func (*BinaryViewType) String() string        { return "binary_view" }
+func (*BinaryViewType) IsUtf8() bool          { return false }
+func (*BinaryViewType) binary()               {}
+func (*BinaryViewType) view()                 {}
+func (t *BinaryViewType) Fingerprint() string { return typeFingerprint(t) }
+func (*BinaryViewType) Layout() DataTypeLayout {
+	variadic := SpecVariableWidth()
+	return DataTypeLayout{
+		Buffers:      []BufferSpec{SpecBitmap(), SpecFixedWidth(ViewHeaderSizeBytes)},
+		VariadicSpec: &variadic,
+	}
+}
+
+type StringViewType struct{}
+
+func (*StringViewType) ID() Type              { return STRING_VIEW }
+func (*StringViewType) Name() string          { return "string_view" }
+func (*StringViewType) String() string        { return "string_view" }
+func (*StringViewType) IsUtf8() bool          { return true }
+func (*StringViewType) binary()               {}
+func (*StringViewType) view()                 {}
+func (t *StringViewType) Fingerprint() string { return typeFingerprint(t) }
+func (*StringViewType) Layout() DataTypeLayout {
+	variadic := SpecVariableWidth()
+	return DataTypeLayout{
+		Buffers:      []BufferSpec{SpecBitmap(), SpecFixedWidth(ViewHeaderSizeBytes)},
+		VariadicSpec: &variadic,
+	}
+}
+
 var (
 	BinaryTypes = struct {
 		Binary      BinaryDataType
 		String      BinaryDataType
 		LargeBinary BinaryDataType
 		LargeString BinaryDataType
+		BinaryView  BinaryDataType
+		StringView  BinaryDataType
 	}{
 		Binary:      &BinaryType{},
 		String:      &StringType{},
 		LargeBinary: &LargeBinaryType{},
 		LargeString: &LargeStringType{},
+		BinaryView:  &BinaryViewType{},
+		StringView:  &StringViewType{},
 	}
+
+	_ BinaryViewDataType = (*StringViewType)(nil)
+	_ BinaryViewDataType = (*BinaryViewType)(nil)
 )
diff --git a/go/arrow/datatype_binary_test.go b/go/arrow/datatype_binary_test.go
index 25ba6e8db4ba4..083d69ee3e5d4 100644
--- a/go/arrow/datatype_binary_test.go
+++ b/go/arrow/datatype_binary_test.go
@@ -81,3 +81,33 @@ func TestLargeStringType(t *testing.T) {
 		t.Fatalf("invalid string type stringer. got=%v, want=%v", got, want)
 	}
 }
+
+func TestBinaryViewType(t *testing.T) {
+	var nt *arrow.BinaryViewType
+	if got, want := nt.ID(), arrow.BINARY_VIEW; got != want {
+		t.Fatalf("invalid string type id. got=%v, want=%v", got, want)
+	}
+
+	if got, want := nt.Name(), "binary_view"; got != want {
+		t.Fatalf("invalid string type name. got=%v, want=%v", got, want)
+	}
+
+	if got, want := nt.String(), "binary_view"; got != want {
+		t.Fatalf("invalid string type stringer. got=%v, want=%v", got, want)
+	}
+}
+
+func TestStringViewType(t *testing.T) {
+	var nt *arrow.StringViewType
+	if got, want := nt.ID(), arrow.STRING_VIEW; got != want {
+		t.Fatalf("invalid string type id. got=%v, want=%v", got, want)
+	}
+
+	if got, want := nt.Name(), "string_view"; got != want {
+		t.Fatalf("invalid string type name. got=%v, want=%v", got, want)
+	}
+
+	if got, want := nt.String(), "string_view"; got != want {
+		t.Fatalf("invalid string type stringer. got=%v, want=%v", got, want)
+	}
+}
diff --git a/go/arrow/datatype_viewheader.go b/go/arrow/datatype_viewheader.go
new file mode 100644
index 0000000000000..54b9256b34604
--- /dev/null
+++ b/go/arrow/datatype_viewheader.go
@@ -0,0 +1,141 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package arrow
+
+import (
+	"bytes"
+	"unsafe"
+
+	"github.com/apache/arrow/go/v15/arrow/endian"
+	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v15/arrow/memory"
+)
+
+const (
+	ViewPrefixLen  = 4
+	viewInlineSize = 12
+)
+
+func IsViewInline(length int) bool {
+	return length < viewInlineSize
+}
+
+// ViewHeader is a variable length string (utf8) or byte slice with
+// a 4 byte prefix and inline optimization for small values (12 bytes
+// or fewer). This is similar to Go's standard string but limited by
+// a length of Uint32Max and up to the first four bytes of the string
+// are copied into the struct. This prefix allows failing comparisons
+// early and can reduce CPU cache working set when dealing with short
+// strings.
+//
+// There are two situations:
+//
+//		Entirely inlined string data
+//	                |----|------------|
+//		                ^    ^
+//		                |    |
+//		              size  inline string data, zero padded
+//
+//		Reference into buffer
+//	                |----|----|----|----|
+//		                ^    ^     ^     ^
+//		                |    |     |     |
+//		              size prefix buffer index and offset to out-of-line portion
+//
+// Adapted from TU Munich's UmbraDB [1], Velox, DuckDB.
+//
+// [1]: https://db.in.tum.de/~freitag/papers/p29-neumann-cidr20.pdf
+type ViewHeader struct {
+	size int32
+	// the first 4 bytes of this are the prefix for the string
+	// if size <= StringHeaderInlineSize, then the entire string
+	// is in the data array and is zero padded.
+	// if size > StringHeaderInlineSize, the next 8 bytes are 2 uint32
+	// values which are the buffer index and offset in that buffer
+	// containing the full string.
+	data [viewInlineSize]byte
+}
+
+func (sh *ViewHeader) IsInline() bool {
+	return sh.size <= int32(viewInlineSize)
+}
+
+func (sh *ViewHeader) Len() int { return int(sh.size) }
+func (sh *ViewHeader) Prefix() [ViewPrefixLen]byte {
+	return *(*[4]byte)(unsafe.Pointer(&sh.data))
+}
+
+func (sh *ViewHeader) BufferIndex() int32 {
+	return int32(endian.Native.Uint32(sh.data[ViewPrefixLen:]))
+}
+
+func (sh *ViewHeader) BufferOffset() int32 {
+	return int32(endian.Native.Uint32(sh.data[ViewPrefixLen+4:]))
+}
+
+func (sh *ViewHeader) InlineBytes() (data []byte) {
+	debug.Assert(sh.IsInline(), "calling InlineBytes on non-inline ViewHeader")
+	return sh.data[:sh.size]
+}
+
+func (sh *ViewHeader) SetBytes(data []byte) int {
+	sh.size = int32(len(data))
+	if sh.IsInline() {
+		return copy(sh.data[:], data)
+	}
+	return copy(sh.data[:4], data)
+}
+
+func (sh *ViewHeader) SetString(data string) int {
+	sh.size = int32(len(data))
+	if sh.IsInline() {
+		return copy(sh.data[:], data)
+	}
+	return copy(sh.data[:4], data)
+}
+
+func (sh *ViewHeader) SetIndexOffset(bufferIndex, offset int32) {
+	endian.Native.PutUint32(sh.data[ViewPrefixLen:], uint32(bufferIndex))
+	endian.Native.PutUint32(sh.data[ViewPrefixLen+4:], uint32(offset))
+}
+
+func (sh *ViewHeader) Equals(buffers []*memory.Buffer, other *ViewHeader, otherBuffers []*memory.Buffer) bool {
+	if sh.sizeAndPrefixAsInt64() != other.sizeAndPrefixAsInt64() {
+		return false
+	}
+
+	if sh.IsInline() {
+		return sh.inlinedAsInt64() == other.inlinedAsInt64()
+	}
+
+	return bytes.Equal(sh.getBufferBytes(buffers), other.getBufferBytes(otherBuffers))
+}
+
+func (sh *ViewHeader) getBufferBytes(buffers []*memory.Buffer) []byte {
+	offset := sh.BufferOffset()
+	return buffers[sh.BufferIndex()].Bytes()[offset : offset+sh.size]
+}
+
+func (sh *ViewHeader) inlinedAsInt64() int64 {
+	s := unsafe.Slice((*int64)(unsafe.Pointer(sh)), 2)
+	return s[1]
+}
+
+func (sh *ViewHeader) sizeAndPrefixAsInt64() int64 {
+	s := unsafe.Slice((*int64)(unsafe.Pointer(sh)), 2)
+	return s[0]
+}
diff --git a/go/arrow/datatype_viewheader_inline.go b/go/arrow/datatype_viewheader_inline.go
new file mode 100644
index 0000000000000..89ac1d06adcdf
--- /dev/null
+++ b/go/arrow/datatype_viewheader_inline.go
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build go1.20
+
+package arrow
+
+import (
+	"unsafe"
+
+	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+)
+
+func (sh *ViewHeader) InlineString() (data string) {
+	debug.Assert(sh.IsInline(), "calling InlineString on non-inline ViewHeader")
+
+	return unsafe.String((*byte)(unsafe.Pointer(&sh.data)), sh.size)
+}
diff --git a/go/arrow/datatype_viewheader_inline_go1.19.go b/go/arrow/datatype_viewheader_inline_go1.19.go
new file mode 100644
index 0000000000000..aec66009d9492
--- /dev/null
+++ b/go/arrow/datatype_viewheader_inline_go1.19.go
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build !go1.20 && !tinygo
+
+package arrow
+
+import (
+	"reflect"
+	"unsafe"
+
+	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+)
+
+func (sh *ViewHeader) InlineString() (data string) {
+	debug.Assert(sh.IsInline(), "calling InlineString on non-inline ViewHeader")
+
+	h := (*reflect.StringHeader)(unsafe.Pointer(&data))
+	h.Data = uintptr(unsafe.Pointer(&sh.data))
+	h.Len = int(sh.size)
+	return
+}
diff --git a/go/arrow/datatype_viewheader_inline_tinygo.go b/go/arrow/datatype_viewheader_inline_tinygo.go
new file mode 100644
index 0000000000000..bff63a273a722
--- /dev/null
+++ b/go/arrow/datatype_viewheader_inline_tinygo.go
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build !go1.20 && tinygo
+
+package arrow
+
+import (
+	"reflect"
+	"unsafe"
+
+	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+)
+
+func (sh *ViewHeader) InlineString() (data string) {
+	debug.Assert(sh.IsInline(), "calling InlineString on non-inline ViewHeader")
+
+	h := (*reflect.StringHeader)(unsafe.Pointer(&data))
+	h.Data = uintptr(unsafe.Pointer(&sh.data))
+	h.Len = uintptr(sh.size)
+	return
+}
diff --git a/go/arrow/internal/arrdata/arrdata.go b/go/arrow/internal/arrdata/arrdata.go
index 6631e4245c19d..985388094eb51 100644
--- a/go/arrow/internal/arrdata/arrdata.go
+++ b/go/arrow/internal/arrdata/arrdata.go
@@ -54,6 +54,7 @@ func init() {
 	Records["extension"] = makeExtensionRecords()
 	Records["union"] = makeUnionRecords()
 	Records["run_end_encoded"] = makeRunEndEncodedRecords()
+	Records["view_types"] = makeStringViewRecords()
 
 	for k := range Records {
 		RecordNames = append(RecordNames, k)
@@ -1155,6 +1156,65 @@ func makeRunEndEncodedRecords() []arrow.Record {
 	return recs
 }
 
+func makeStringViewRecords() []arrow.Record {
+	mem := memory.NewGoAllocator()
+	schema := arrow.NewSchema([]arrow.Field{
+		{Name: "binary_view", Type: arrow.BinaryTypes.BinaryView, Nullable: true},
+		{Name: "string_view", Type: arrow.BinaryTypes.StringView, Nullable: true},
+	}, nil)
+
+	mask := []bool{true, false, false, true, true}
+	chunks := [][]arrow.Array{
+		{
+			viewTypeArrayOf(mem, [][]byte{[]byte("1é"), []byte("2"), []byte("3"), []byte("4"), []byte("5")}, mask),
+			viewTypeArrayOf(mem, []string{"1é", "2", "3", "4", "5"}, mask),
+		},
+		{
+			viewTypeArrayOf(mem, [][]byte{[]byte("1é"), []byte("22222222222222"), []byte("33333333333333"), []byte("4444"), []byte("5555")}, mask),
+			viewTypeArrayOf(mem, []string{"1é", "22222222222222", "33333333333333", "4444", "5555"}, nil),
+		},
+		{
+			viewTypeArrayOf(mem, [][]byte{[]byte("1é1é"), []byte("22222222222222"), []byte("33333333333333"), []byte("44"), []byte("55")}, nil),
+			viewTypeArrayOf(mem, []string{"1é1é", "22222222222222", "33333333333333", "44", "55"}, mask),
+		},
+	}
+
+	defer func() {
+		for _, chunk := range chunks {
+			for _, col := range chunk {
+				col.Release()
+			}
+		}
+	}()
+
+	recs := make([]arrow.Record, len(chunks))
+	for i, chunk := range chunks {
+		recs[i] = array.NewRecord(schema, chunk, -1)
+	}
+
+	return recs
+}
+
+func viewTypeArrayOf(mem memory.Allocator, a interface{}, valids []bool) arrow.Array {
+	if mem == nil {
+		mem = memory.NewGoAllocator()
+	}
+
+	switch a := a.(type) {
+	case []string:
+		bldr := array.NewStringViewBuilder(mem)
+		defer bldr.Release()
+		bldr.AppendValues(a, valids)
+		return bldr.NewArray()
+	case [][]byte:
+		bldr := array.NewBinaryViewBuilder(mem)
+		defer bldr.Release()
+		bldr.AppendValues(a, valids)
+		return bldr.NewArray()
+	}
+	return nil
+}
+
 func extArray(mem memory.Allocator, dt arrow.ExtensionType, a interface{}, valids []bool) arrow.Array {
 	var storage arrow.Array
 	switch st := dt.StorageType().(type) {
@@ -1750,5 +1810,26 @@ func buildArray(bldr array.Builder, data arrow.Array) {
 				bldr.AppendNull()
 			}
 		}
+
+	case *array.BinaryViewBuilder:
+		data := data.(*array.BinaryView)
+		for i := 0; i < data.Len(); i++ {
+			switch {
+			case data.IsValid(i):
+				bldr.Append(data.Value(i))
+			default:
+				bldr.AppendNull()
+			}
+		}
+	case *array.StringViewBuilder:
+		data := data.(*array.StringView)
+		for i := 0; i < data.Len(); i++ {
+			switch {
+			case data.IsValid(i):
+				bldr.Append(data.Value(i))
+			default:
+				bldr.AppendNull()
+			}
+		}
 	}
 }
diff --git a/go/arrow/internal/arrjson/arrjson.go b/go/arrow/internal/arrjson/arrjson.go
index 87bdc1f44d875..f74b615362642 100644
--- a/go/arrow/internal/arrjson/arrjson.go
+++ b/go/arrow/internal/arrjson/arrjson.go
@@ -158,6 +158,10 @@ func typeToJSON(arrowType arrow.DataType) (json.RawMessage, error) {
 		typ = nameJSON{"utf8"}
 	case *arrow.LargeStringType:
 		typ = nameJSON{"largeutf8"}
+	case *arrow.BinaryViewType:
+		typ = nameJSON{"binaryview"}
+	case *arrow.StringViewType:
+		typ = nameJSON{"utf8view"}
 	case *arrow.Date32Type:
 		typ = unitZoneJSON{Name: "date", Unit: "DAY"}
 	case *arrow.Date64Type:
@@ -342,6 +346,10 @@ func typeFromJSON(typ json.RawMessage, children []FieldWrapper) (arrowType arrow
 		arrowType = arrow.BinaryTypes.String
 	case "largeutf8":
 		arrowType = arrow.BinaryTypes.LargeString
+	case "binaryview":
+		arrowType = arrow.BinaryTypes.BinaryView
+	case "utf8view":
+		arrowType = arrow.BinaryTypes.StringView
 	case "date":
 		t := unitZoneJSON{}
 		if err = json.Unmarshal(typ, &t); err != nil {
@@ -818,6 +826,7 @@ type Array struct {
 	Offset   interface{}           `json:"OFFSET,omitempty"`
 	Size     interface{}           `json:"SIZE,omitempty"`
 	Children []Array               `json:"children,omitempty"`
+	Variadic []string              `json:"VARIADIC_BUFFERS,omitempty"`
 }
 
 func (a *Array) MarshalJSON() ([]byte, error) {
@@ -1078,6 +1087,18 @@ func arrayFromJSON(mem memory.Allocator, dt arrow.DataType, arr Array) arrow.Arr
 		bldr.AppendValues(data, valids)
 		return returnNewArrayData(bldr)
 
+	case arrow.BinaryViewDataType:
+		valids := validsToBitmap(validsFromJSON(arr.Valids), mem)
+		nulls := arr.Count - bitutil.CountSetBits(valids.Bytes(), 0, arr.Count)
+		headers := stringHeadersFromJSON(mem, !dt.IsUtf8(), arr.Data)
+		extraBufs := variadicBuffersFromJSON(arr.Variadic)
+		defer valids.Release()
+		defer headers.Release()
+
+		return array.NewData(dt, arr.Count,
+			append([]*memory.Buffer{valids, headers}, extraBufs...),
+			nil, nulls, 0)
+
 	case *arrow.ListType:
 		valids := validsFromJSON(arr.Valids)
 		elems := arrayFromJSON(mem, dt.Elem(), arr.Children[0])
@@ -1486,6 +1507,24 @@ func arrayToJSON(field arrow.Field, arr arrow.Array) Array {
 			Offset: strOffsets,
 		}
 
+	case *array.StringView:
+		variadic := variadicBuffersToJSON(arr.Data().Buffers()[2:])
+		return Array{
+			Name:     field.Name,
+			Count:    arr.Len(),
+			Valids:   validsToJSON(arr),
+			Data:     stringHeadersToJSON(arr, false),
+			Variadic: variadic,
+		}
+	case *array.BinaryView:
+		variadic := variadicBuffersToJSON(arr.Data().Buffers()[2:])
+		return Array{
+			Name:     field.Name,
+			Count:    arr.Len(),
+			Valids:   validsToJSON(arr),
+			Data:     stringHeadersToJSON(arr, true),
+			Variadic: variadic,
+		}
 	case *array.List:
 		o := Array{
 			Name:   field.Name,
@@ -2309,3 +2348,114 @@ func durationToJSON(arr *array.Duration) []interface{} {
 	}
 	return o
 }
+
+func variadicBuffersFromJSON(bufs []string) []*memory.Buffer {
+	out := make([]*memory.Buffer, len(bufs))
+	for i, data := range bufs {
+		rawData, err := hex.DecodeString(data)
+		if err != nil {
+			panic(err)
+		}
+
+		out[i] = memory.NewBufferBytes(rawData)
+	}
+	return out
+}
+
+func variadicBuffersToJSON(bufs []*memory.Buffer) []string {
+	out := make([]string, len(bufs))
+	for i, data := range bufs {
+		out[i] = strings.ToUpper(hex.EncodeToString(data.Bytes()))
+	}
+	return out
+}
+
+func stringHeadersFromJSON(mem memory.Allocator, isBinary bool, data []interface{}) *memory.Buffer {
+	buf := memory.NewResizableBuffer(mem)
+	buf.Resize(arrow.ViewHeaderTraits.BytesRequired(len(data)))
+
+	values := arrow.ViewHeaderTraits.CastFromBytes(buf.Bytes())
+
+	for i, d := range data {
+		switch v := d.(type) {
+		case nil:
+			continue
+		case map[string]interface{}:
+			if inlined, ok := v["INLINED"]; ok {
+				if isBinary {
+					val, err := hex.DecodeString(inlined.(string))
+					if err != nil {
+						panic(fmt.Errorf("could not decode %v: %v", inlined, err))
+					}
+					values[i].SetBytes(val)
+				} else {
+					values[i].SetString(inlined.(string))
+				}
+				continue
+			}
+
+			idx, offset := v["BUFFER_INDEX"].(json.Number), v["OFFSET"].(json.Number)
+			bufIdx, err := idx.Int64()
+			if err != nil {
+				panic(err)
+			}
+
+			bufOffset, err := offset.Int64()
+			if err != nil {
+				panic(err)
+			}
+
+			values[i].SetIndexOffset(int32(bufIdx), int32(bufOffset))
+			prefix, err := hex.DecodeString(v["PREFIX"].(string))
+			if err != nil {
+				panic(err)
+			}
+			sz, err := v["SIZE"].(json.Number).Int64()
+			if err != nil {
+				panic(err)
+			}
+
+			rawData := make([]byte, sz)
+			copy(rawData, prefix)
+			values[i].SetBytes(rawData)
+		}
+	}
+	return buf
+}
+
+func stringHeadersToJSON(arr array.ViewLike, isBinary bool) []interface{} {
+	type StringHeader struct {
+		Size      int     `json:"SIZE"`
+		Prefix    *string `json:"PREFIX,omitempty"`
+		BufferIdx *int    `json:"BUFFER_INDEX,omitempty"`
+		BufferOff *int    `json:"OFFSET,omitempty"`
+		Inlined   *string `json:"INLINED,omitempty"`
+	}
+
+	o := make([]interface{}, arr.Len())
+	for i := range o {
+		hdr := arr.ValueHeader(i)
+		if hdr.IsInline() {
+			data := hdr.InlineString()
+			if isBinary {
+				data = strings.ToUpper(hex.EncodeToString(hdr.InlineBytes()))
+			}
+			o[i] = StringHeader{
+				Size:    hdr.Len(),
+				Inlined: &data,
+			}
+			continue
+		}
+
+		idx, off := int(hdr.BufferIndex()), int(hdr.BufferOffset())
+		prefix := hdr.Prefix()
+		encodedPrefix := strings.ToUpper(hex.EncodeToString(prefix[:]))
+		o[i] = StringHeader{
+			Size:      hdr.Len(),
+			Prefix:    &encodedPrefix,
+			BufferIdx: &idx,
+			BufferOff: &off,
+		}
+	}
+	return o
+}
diff --git a/go/arrow/internal/arrjson/arrjson_test.go b/go/arrow/internal/arrjson/arrjson_test.go
index 7beadee370edb..31f3cb238ec16 100644
--- a/go/arrow/internal/arrjson/arrjson_test.go
+++ b/go/arrow/internal/arrjson/arrjson_test.go
@@ -48,6 +48,7 @@ func TestReadWrite(t *testing.T) {
 	wantJSONs["dictionary"] = makeDictionaryWantJSONs()
 	wantJSONs["union"] = makeUnionWantJSONs()
 	wantJSONs["run_end_encoded"] = makeRunEndEncodedWantJSONs()
+	wantJSONs["view_types"] = makeViewTypesWantJSONs()
 	tempDir := t.TempDir()
 
 	for name, recs := range arrdata.Records {
@@ -6127,3 +6128,261 @@ func makeRunEndEncodedWantJSONs() string {
   ]
 }`
 }
+
+func makeViewTypesWantJSONs() string {
+	return `{
+  "schema": {
+    "fields": [
+      {
+        "name": "binary_view",
+        "type": {
+          "name": "binaryview"
+        },
+        "nullable": true,
+        "children": []
+      },
+      {
+        "name": "string_view",
+        "type": {
+          "name": "utf8view"
+        },
+        "nullable": true,
+        "children": []
+      }
+    ]
+  },
+  "batches": [
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "binary_view",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            0,
+            1,
+            1
+          ],
+          "DATA": [
+            {
+              "SIZE": 3,
+              "INLINED": "31C3A9"
+            },
+            {
+              "SIZE": 0,
+              "INLINED": ""
+            },
+            {
+              "SIZE": 0,
+              "INLINED": ""
+            },
+            {
+              "SIZE": 1,
+              "INLINED": "34"
+            },
+            {
+              "SIZE": 1,
+              "INLINED": "35"
+            }
+          ],
+          "VARIADIC_BUFFERS": [""]
+        },
+        {
+          "name": "string_view",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            0,
+            1,
+            1
+          ],
+          "DATA": [
+            {
+              "SIZE": 3,
+              "INLINED": "1é" 
+            },
+            {
+              "SIZE": 0,
+              "INLINED": ""
+            },
+            {
+              "SIZE": 0,
+              "INLINED": ""
+            },
+            {
+              "SIZE": 1,
+              "INLINED": "4"
+            },
+            {
+              "SIZE": 1,
+              "INLINED": "5"
+            }
+          ],
+          "VARIADIC_BUFFERS": [""]
+        }
+      ]
+    },
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "binary_view",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            0,
+            1,
+            1
+          ],
+          "DATA": [
+            {
+              "SIZE": 3,
+              "INLINED": "31C3A9"
+            },
+            {
+              "SIZE": 0,
+              "INLINED": ""
+            },
+            {
+              "SIZE": 0,
+              "INLINED": ""
+            },
+            {
+              "SIZE": 4,
+              "INLINED": "34343434"
+            },
+            {
+              "SIZE": 4,
+              "INLINED": "35353535"
+            }
+          ],
+          "VARIADIC_BUFFERS": [""]
+        },
+        {
+          "name": "string_view",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            1,
+            1,
+            1,
+            1
+          ],
+          "DATA": [
+            {
+              "SIZE": 3,
+              "INLINED": "1é"              
+            },
+            {
+              "SIZE": 14,
+              "PREFIX": "32323232",
+              "BUFFER_INDEX": 0,
+              "OFFSET": 0
+            },
+            {
+              "SIZE": 14,
+              "PREFIX": "33333333",
+              "BUFFER_INDEX": 0,
+              "OFFSET": 14
+            },
+            {
+              "SIZE": 4,
+              "INLINED": "4444"
+            },
+            {
+              "SIZE": 4,
+              "INLINED": "5555"
+            }
+          ],
+          "VARIADIC_BUFFERS": [
+            "32323232323232323232323232323333333333333333333333333333"
+          ]
+        }
+      ]
+    },
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "binary_view",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            1,
+            1,
+            1,
+            1
+          ],
+          "DATA": [
+            {
+              "SIZE": 6,
+              "INLINED": "31C3A931C3A9"
+            },
+            {
+              "SIZE": 14,
+              "PREFIX": "32323232",
+              "BUFFER_INDEX": 0,
+              "OFFSET": 0
+            },
+            {
+              "SIZE": 14,
+              "PREFIX": "33333333",
+              "BUFFER_INDEX": 0,
+              "OFFSET": 14
+            },
+            {
+              "SIZE": 2,
+              "INLINED": "3434"
+            },
+            {
+              "SIZE": 2,
+              "INLINED": "3535"
+            }
+          ],
+          "VARIADIC_BUFFERS": [
+            "32323232323232323232323232323333333333333333333333333333"
+          ]
+        },
+        {
+          "name": "string_view",
+          "count": 5,
+          "VALIDITY": [
+            1,
+            0,
+            0,
+            1,
+            1
+          ],
+          "DATA": [
+            {
+              "SIZE": 6,
+              "INLINED": "1é1é"
+            },
+            {
+              "SIZE": 0,
+              "INLINED": ""
+            },
+            {
+              "SIZE": 0,
+              "INLINED": ""
+            },
+            {
+              "SIZE": 2,
+              "INLINED": "44"
+            },
+            {
+              "SIZE": 2,
+              "INLINED": "55"
+            }
+          ],
+          "VARIADIC_BUFFERS": [""]
+        }
+      ]
+    }
+  ]
+}`
+}
diff --git a/go/arrow/internal/flatbuf/MetadataVersion.go b/go/arrow/internal/flatbuf/MetadataVersion.go
index 21b234f9c2b21..bb5e99dd588ad 100644
--- a/go/arrow/internal/flatbuf/MetadataVersion.go
+++ b/go/arrow/internal/flatbuf/MetadataVersion.go
@@ -31,7 +31,7 @@ const (
 	MetadataVersionV3 MetadataVersion = 2
 	/// >= 0.8.0 (December 2017). Non-backwards compatible with V3.
 	MetadataVersionV4 MetadataVersion = 3
-	/// >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4
+	/// >= 1.0.0 (July 2020). Backwards compatible with V4 (V5 readers can read V4
 	/// metadata and IPC messages). Implementations are recommended to provide a
 	/// V4 compatibility mode with V5 format changes disabled.
 	///
diff --git a/go/arrow/internal/testing/gen/random_array_gen.go b/go/arrow/internal/testing/gen/random_array_gen.go
index b42273ff93fac..57b417bd2b878 100644
--- a/go/arrow/internal/testing/gen/random_array_gen.go
+++ b/go/arrow/internal/testing/gen/random_array_gen.go
@@ -351,6 +351,40 @@ func (r *RandomArrayGenerator) LargeString(size int64, minLength, maxLength int6
 	return bldr.NewArray()
 }
 
+func (r *RandomArrayGenerator) StringView(size int64, minLength, maxLength int64, nullProb float64) arrow.Array {
+	return r.generateBinaryView(arrow.BinaryTypes.StringView, size, minLength, maxLength, nullProb)
+}
+
+func (r *RandomArrayGenerator) generateBinaryView(dt arrow.DataType, size int64, minLength, maxLength int64, nullProb float64) arrow.Array {
+	lengths := r.Int32(size, int32(minLength), int32(maxLength), nullProb).(*array.Int32)
+	defer lengths.Release()
+
+	bldr := array.NewBuilder(r.mem, dt).(array.StringLikeBuilder)
+	defer bldr.Release()
+
+	r.extra++
+	dist := rand.New(rand.NewSource(r.seed + r.extra))
+
+	buf := make([]byte, 0, maxLength)
+	gen := func(n int32) string {
+		out := buf[:n]
+		for i := range out {
+			out[i] = uint8(dist.Int31n(int32('z')-int32('A')+1) + int32('A'))
+		}
+		return string(out)
+	}
+
+	for i := 0; i < lengths.Len(); i++ {
+		if lengths.IsNull(i) {
+			bldr.AppendNull()
+			continue
+		}
+		bldr.Append(gen(lengths.Value(i)))
+	}
+
+	return bldr.NewArray()
+}
+
 func (r *RandomArrayGenerator) Numeric(dt arrow.Type, size int64, min, max int64, nullprob float64) arrow.Array {
 	switch dt {
 	case arrow.INT8:
diff --git a/go/arrow/ipc/endian_swap.go b/go/arrow/ipc/endian_swap.go
index d2e0948434abc..35ba0e4e764f9 100644
--- a/go/arrow/ipc/endian_swap.go
+++ b/go/arrow/ipc/endian_swap.go
@@ -18,6 +18,7 @@ package ipc
 
 import (
 	"errors"
+	"fmt"
 	"math/bits"
 
 	"github.com/apache/arrow/go/v15/arrow"
@@ -119,7 +120,10 @@ func swapType(dt arrow.DataType, data *array.Data) (err error) {
 		return swapType(dt.IndexType, data)
 	case arrow.FixedWidthDataType:
 		byteSwapBuffer(dt.BitWidth(), data.Buffers()[1])
+	default:
+		err = fmt.Errorf("%w: swapping endianness of %s", arrow.ErrNotImplemented, dt)
 	}
+
 	return
 }
 
diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go
index 330355d3a60c3..1c7eb31799cfa 100644
--- a/go/arrow/ipc/file_reader.go
+++ b/go/arrow/ipc/file_reader.go
@@ -430,13 +430,18 @@ func (src *ipcSource) fieldMetadata(i int) *flatbuf.FieldNode {
 	return &node
 }
 
+func (src *ipcSource) variadicCount(i int) int64 {
+	return src.meta.VariadicBufferCounts(i)
+}
+
 type arrayLoaderContext struct {
-	src     ipcSource
-	ifield  int
-	ibuffer int
-	max     int
-	memo    *dictutils.Memo
-	version MetadataVersion
+	src       ipcSource
+	ifield    int
+	ibuffer   int
+	ivariadic int
+	max       int
+	memo      *dictutils.Memo
+	version   MetadataVersion
 }
 
 func (ctx *arrayLoaderContext) field() *flatbuf.FieldNode {
@@ -451,6 +456,12 @@ func (ctx *arrayLoaderContext) buffer() *memory.Buffer {
 	return buf
 }
 
+func (ctx *arrayLoaderContext) variadic() int64 {
+	v := ctx.src.variadicCount(ctx.ivariadic)
+	ctx.ivariadic++
+	return v
+}
+
 func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) arrow.ArrayData {
 	switch dt := dt.(type) {
 	case *arrow.NullType:
@@ -476,6 +487,9 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) arrow.ArrayData {
 	case *arrow.BinaryType, *arrow.StringType, *arrow.LargeStringType, *arrow.LargeBinaryType:
 		return ctx.loadBinary(dt)
 
+	case arrow.BinaryViewDataType:
+		return ctx.loadBinaryView(dt)
+
 	case *arrow.FixedSizeBinaryType:
 		return ctx.loadFixedSizeBinary(dt)
 
@@ -582,6 +596,18 @@ func (ctx *arrayLoaderContext) loadBinary(dt arrow.DataType) arrow.ArrayData {
 	return array.NewData(dt, int(field.Length()), buffers, nil, int(field.NullCount()), 0)
 }
 
+func (ctx *arrayLoaderContext) loadBinaryView(dt arrow.DataType) arrow.ArrayData {
+	nVariadicBufs := ctx.variadic()
+	field, buffers := ctx.loadCommon(dt.ID(), 2+int(nVariadicBufs))
+	buffers = append(buffers, ctx.buffer())
+	for i := 0; i < int(nVariadicBufs); i++ {
+		buffers = append(buffers, ctx.buffer())
+	}
+	defer releaseBuffers(buffers)
+
+	return array.NewData(dt, int(field.Length()), buffers, nil, int(field.NullCount()), 0)
+}
+
 func (ctx *arrayLoaderContext) loadFixedSizeBinary(dt *arrow.FixedSizeBinaryType) arrow.ArrayData {
 	field, buffers := ctx.loadCommon(dt.ID(), 2)
 	buffers = append(buffers, ctx.buffer())
diff --git a/go/arrow/ipc/message.go b/go/arrow/ipc/message.go
index 709aa5aa2dba4..5295c5df30137 100644
--- a/go/arrow/ipc/message.go
+++ b/go/arrow/ipc/message.go
@@ -31,11 +31,11 @@ import (
 type MetadataVersion flatbuf.MetadataVersion
 
 const (
-	MetadataV1 = MetadataVersion(flatbuf.MetadataVersionV1) // version for Arrow-0.1.0
-	MetadataV2 = MetadataVersion(flatbuf.MetadataVersionV2) // version for Arrow-0.2.0
-	MetadataV3 = MetadataVersion(flatbuf.MetadataVersionV3) // version for Arrow-0.3.0 to 0.7.1
-	MetadataV4 = MetadataVersion(flatbuf.MetadataVersionV4) // version for >= Arrow-0.8.0
-	MetadataV5 = MetadataVersion(flatbuf.MetadataVersionV5) // version for >= Arrow-1.0.0, backward compatible with v4
+	MetadataV1 = MetadataVersion(flatbuf.MetadataVersionV1) // version for Arrow Format-0.1.0
+	MetadataV2 = MetadataVersion(flatbuf.MetadataVersionV2) // version for Arrow Format-0.2.0
+	MetadataV3 = MetadataVersion(flatbuf.MetadataVersionV3) // version for Arrow Format-0.3.0 to 0.7.1
+	MetadataV4 = MetadataVersion(flatbuf.MetadataVersionV4) // version for >= Arrow Format-0.8.0
+	MetadataV5 = MetadataVersion(flatbuf.MetadataVersionV5) // version for >= Arrow Format-1.0.0, backward compatible with v4
 )
 
 func (m MetadataVersion) String() string {
diff --git a/go/arrow/ipc/metadata.go b/go/arrow/ipc/metadata.go
index bd437834c3d06..54ef58753a173 100644
--- a/go/arrow/ipc/metadata.go
+++ b/go/arrow/ipc/metadata.go
@@ -323,6 +323,16 @@ func (fv *fieldVisitor) visit(field arrow.Field) {
 		flatbuf.LargeUtf8Start(fv.b)
 		fv.offset = flatbuf.LargeUtf8End(fv.b)
 
+	case *arrow.BinaryViewType:
+		fv.dtype = flatbuf.TypeBinaryView
+		flatbuf.BinaryViewStart(fv.b)
+		fv.offset = flatbuf.BinaryViewEnd(fv.b)
+
+	case *arrow.StringViewType:
+		fv.dtype = flatbuf.TypeUtf8View
+		flatbuf.Utf8ViewStart(fv.b)
+		fv.offset = flatbuf.Utf8ViewEnd(fv.b)
+
 	case *arrow.Date32Type:
 		fv.dtype = flatbuf.TypeDate
 		flatbuf.DateStart(fv.b)
@@ -713,6 +723,12 @@ func concreteTypeFromFB(typ flatbuf.Type, data flatbuffers.Table, children []arr
 	case flatbuf.TypeLargeUtf8:
 		return arrow.BinaryTypes.LargeString, nil
 
+	case flatbuf.TypeUtf8View:
+		return arrow.BinaryTypes.StringView, nil
+
+	case flatbuf.TypeBinaryView:
+		return arrow.BinaryTypes.BinaryView, nil
+
 	case flatbuf.TypeBool:
 		return arrow.FixedWidthTypes.Boolean, nil
 
@@ -1168,15 +1184,15 @@ func writeFileFooter(schema *arrow.Schema, dicts, recs []fileBlock, w io.Writer)
 	return err
 }
 
-func writeRecordMessage(mem memory.Allocator, size, bodyLength int64, fields []fieldMetadata, meta []bufferMetadata, codec flatbuf.CompressionType) *memory.Buffer {
+func writeRecordMessage(mem memory.Allocator, size, bodyLength int64, fields []fieldMetadata, meta []bufferMetadata, codec flatbuf.CompressionType, variadicCounts []int64) *memory.Buffer {
 	b := flatbuffers.NewBuilder(0)
-	recFB := recordToFB(b, size, bodyLength, fields, meta, codec)
+	recFB := recordToFB(b, size, bodyLength, fields, meta, codec, variadicCounts)
 	return writeMessageFB(b, mem, flatbuf.MessageHeaderRecordBatch, recFB, bodyLength)
 }
 
-func writeDictionaryMessage(mem memory.Allocator, id int64, isDelta bool, size, bodyLength int64, fields []fieldMetadata, meta []bufferMetadata, codec flatbuf.CompressionType) *memory.Buffer {
+func writeDictionaryMessage(mem memory.Allocator, id int64, isDelta bool, size, bodyLength int64, fields []fieldMetadata, meta []bufferMetadata, codec flatbuf.CompressionType, variadicCounts []int64) *memory.Buffer {
 	b := flatbuffers.NewBuilder(0)
-	recFB := recordToFB(b, size, bodyLength, fields, meta, codec)
+	recFB := recordToFB(b, size, bodyLength, fields, meta, codec, variadicCounts)
 
 	flatbuf.DictionaryBatchStart(b)
 	flatbuf.DictionaryBatchAddId(b, id)
@@ -1186,7 +1202,7 @@ func writeDictionaryMessage(mem memory.Allocator, id int64, isDelta bool, size,
 	return writeMessageFB(b, mem, flatbuf.MessageHeaderDictionaryBatch, dictFB, bodyLength)
 }
 
-func recordToFB(b *flatbuffers.Builder, size, bodyLength int64, fields []fieldMetadata, meta []bufferMetadata, codec flatbuf.CompressionType) flatbuffers.UOffsetT {
+func recordToFB(b *flatbuffers.Builder, size, bodyLength int64, fields []fieldMetadata, meta []bufferMetadata, codec flatbuf.CompressionType, variadicCounts []int64) flatbuffers.UOffsetT {
 	fieldsFB := writeFieldNodes(b, fields, flatbuf.RecordBatchStartNodesVector)
 	metaFB := writeBuffers(b, meta, flatbuf.RecordBatchStartBuffersVector)
 	var bodyCompressFB flatbuffers.UOffsetT
@@ -1194,10 +1210,24 @@ func recordToFB(b *flatbuffers.Builder, size, bodyLength int64, fields []fieldMe
 		bodyCompressFB = writeBodyCompression(b, codec)
 	}
 
+	var vcFB *flatbuffers.UOffsetT
+	if len(variadicCounts) > 0 {
+		flatbuf.RecordBatchStartVariadicBufferCountsVector(b, len(variadicCounts))
+		for i := len(variadicCounts) - 1; i >= 0; i-- {
+			b.PrependInt64(variadicCounts[i])
+		}
+		vcFBVal := b.EndVector(len(variadicCounts))
+		vcFB = &vcFBVal
+	}
+
 	flatbuf.RecordBatchStart(b)
 	flatbuf.RecordBatchAddLength(b, size)
 	flatbuf.RecordBatchAddNodes(b, fieldsFB)
 	flatbuf.RecordBatchAddBuffers(b, metaFB)
+	if vcFB != nil {
+		flatbuf.RecordBatchAddVariadicBufferCounts(b, *vcFB)
+	}
+
 	if codec != -1 {
 		flatbuf.RecordBatchAddCompression(b, bodyCompressFB)
 	}
diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go
index 58c56d2d16ccf..e9d59f0e35e00 100644
--- a/go/arrow/ipc/writer.go
+++ b/go/arrow/ipc/writer.go
@@ -277,7 +277,7 @@ type dictEncoder struct {
 }
 
 func (d *dictEncoder) encodeMetadata(p *Payload, isDelta bool, id, nrows int64) error {
-	p.meta = writeDictionaryMessage(d.mem, id, isDelta, nrows, p.size, d.fields, d.meta, d.codec)
+	p.meta = writeDictionaryMessage(d.mem, id, isDelta, nrows, p.size, d.fields, d.meta, d.codec, d.variadicCounts)
 	return nil
 }
 
@@ -300,8 +300,9 @@ func (d *dictEncoder) Encode(p *Payload, id int64, isDelta bool, dict arrow.Arra
 type recordEncoder struct {
 	mem memory.Allocator
 
-	fields []fieldMetadata
-	meta   []bufferMetadata
+	fields         []fieldMetadata
+	meta           []bufferMetadata
+	variadicCounts []int64
 
 	depth           int64
 	start           int64
@@ -602,6 +603,33 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) error {
 		p.body = append(p.body, voffsets)
 		p.body = append(p.body, values)
 
+	case arrow.BinaryViewDataType:
+		data := arr.Data()
+		values := data.Buffers()[1]
+		arrLen := int64(arr.Len())
+		typeWidth := int64(arrow.ViewHeaderSizeBytes)
+		minLength := paddedLength(arrLen*typeWidth, kArrowAlignment)
+
+		switch {
+		case needTruncate(int64(data.Offset()), values, minLength):
+			// non-zero offset: slice the buffer
+			offset := data.Offset() * int(typeWidth)
+			// send padding if available
+			len := int(minI64(bitutil.CeilByte64(arrLen*typeWidth), int64(values.Len()-offset)))
+			values = memory.SliceBuffer(values, offset, len)
+		default:
+			if values != nil {
+				values.Retain()
+			}
+		}
+		p.body = append(p.body, values)
+
+		w.variadicCounts = append(w.variadicCounts, int64(len(data.Buffers())-2))
+		for _, b := range data.Buffers()[2:] {
+			b.Retain()
+			p.body = append(p.body, b)
+		}
+
 	case *arrow.StructType:
 		w.depth--
 		arr := arr.(*array.Struct)
@@ -946,7 +974,7 @@ func (w *recordEncoder) Encode(p *Payload, rec arrow.Record) error {
 }
 
 func (w *recordEncoder) encodeMetadata(p *Payload, nrows int64) error {
-	p.meta = writeRecordMessage(w.mem, nrows, p.size, w.fields, w.meta, w.codec)
+	p.meta = writeRecordMessage(w.mem, nrows, p.size, w.fields, w.meta, w.codec, w.variadicCounts)
 	return nil
 }
 
diff --git a/go/arrow/type_traits_view.go b/go/arrow/type_traits_view.go
new file mode 100644
index 0000000000000..c3846db294681
--- /dev/null
+++ b/go/arrow/type_traits_view.go
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package arrow
+
+import (
+	"reflect"
+	"unsafe"
+
+	"github.com/apache/arrow/go/v15/arrow/endian"
+)
+
+var ViewHeaderTraits viewHeaderTraits
+
+const (
+	ViewHeaderSizeBytes = int(unsafe.Sizeof(ViewHeader{}))
+)
+
+type viewHeaderTraits struct{}
+
+func (viewHeaderTraits) BytesRequired(n int) int { return ViewHeaderSizeBytes * n }
+
+func (viewHeaderTraits) PutValue(b []byte, v ViewHeader) {
+	endian.Native.PutUint32(b, uint32(v.size))
+	copy(b[4:], v.data[:])
+}
+
+func (viewHeaderTraits) CastFromBytes(b []byte) (res []ViewHeader) {
+	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
+
+	return unsafe.Slice((*ViewHeader)(unsafe.Pointer(h.Data)), cap(b)/ViewHeaderSizeBytes)[:len(b)/ViewHeaderSizeBytes]
+}
+
+func (viewHeaderTraits) CastToBytes(b []ViewHeader) (res []byte) {
+	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
+
+	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*ViewHeaderSizeBytes)[:len(b)*ViewHeaderSizeBytes]
+}
+
+func (viewHeaderTraits) Copy(dst, src []ViewHeader) { copy(dst, src) }

From a886fdaa2d80a2e7f56cf8a3cf94b367443b6e8e Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Tue, 14 Nov 2023 22:19:48 -0400
Subject: [PATCH 07/23] GH-38715: [R] Fix possible bashism in configure script
 (#38716)

### Rationale for this change

The CRAN incoming check for 14.0.0 is failing with a NOTE about a possible bashism

### What changes are included in this PR?

One `test -a` usage was replaced with `&&`.

### Are these changes tested?

Yes (via crossbow, below)

### Are there any user-facing changes?

No
* Closes: #38715

Authored-by: Dewey Dunnington <dewey@fishandwhistle.net>
Signed-off-by: Dewey Dunnington <dewey@fishandwhistle.net>
---
 r/configure | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/r/configure b/r/configure
index 5a7f0c81a947b..4f09cfdc4419b 100755
--- a/r/configure
+++ b/r/configure
@@ -126,7 +126,7 @@ fi
 # but doing it now allows us to catch it in
 # nixlibs.R and activate S3 and GCS support for the source build.
 
-# macOS ships with libressl. openssl is installable with brew, but it is 
+# macOS ships with libressl. openssl is installable with brew, but it is
 # generally not linked. We can over-ride this and find
 # openssl by setting OPENSSL_ROOT_DIR (which cmake will pick up later in
 # the installation process).
@@ -135,7 +135,7 @@ if [ "${OPENSSL_ROOT_DIR}" = "" ] && brew --prefix openssl >/dev/null 2>&1; then
   export PKG_CONFIG_PATH="${OPENSSL_ROOT_DIR}/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}"
 fi
 # Look for openssl with pkg-config for non-brew sources(e.g. CRAN) and Linux
-if [ "${OPENSSL_ROOT_DIR}" = "" -a "${PKG_CONFIG_AVAILABLE}" = "true" ]; then
+if [ "${OPENSSL_ROOT_DIR}" = "" ] && [ "${PKG_CONFIG_AVAILABLE}" = "true" ]; then
   if ${PKG_CONFIG} --exists openssl; then
     export OPENSSL_ROOT_DIR="`${PKG_CONFIG} --variable=prefix openssl`"
   fi
@@ -282,7 +282,7 @@ set_pkg_vars () {
     PKG_CFLAGS="$PKG_CFLAGS $ARROW_R_CXXFLAGS"
   fi
 
-  # We use expr because the product version returns more than just 10.13 and we want to 
+  # We use expr because the product version returns more than just 10.13 and we want to
   # match the substring. However, expr always outputs the number of matched characters
   # to stdout, to avoid noise in the log we redirect the output to /dev/null
   if [ "$UNAME" = "Darwin" ] && expr $(sw_vers -productVersion) : '10\.13' >/dev/null 2>&1; then

From e49d8ae15583ceff03237571569099a6ad62be32 Mon Sep 17 00:00:00 2001
From: Hyunseok Seo <hsseo0501@gmail.com>
Date: Wed, 15 Nov 2023 14:17:42 +0900
Subject: [PATCH 08/23] GH-38711: [CI] Rollback aws-cli for preview
 documentation (#38723)

### Rationale for this change

Restored the Runner image to not delete `aws-cli` for the execution of the `preview-docs` command.

### Are these changes tested?

No

### Are there any user-facing changes?

No

* Closes: #38711

Authored-by: Hyunseok Seo <hsseo0501@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ci/scripts/util_free_space.sh | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/ci/scripts/util_free_space.sh b/ci/scripts/util_free_space.sh
index 0518869d06993..dd6ba2c4600a9 100755
--- a/ci/scripts/util_free_space.sh
+++ b/ci/scripts/util_free_space.sh
@@ -25,7 +25,6 @@ du -hsc /usr/local/*
 echo "::endgroup::"
 # ~1GB
 sudo rm -rf \
-  /usr/local/aws-cli \
   /usr/local/aws-sam-cil \
   /usr/local/julia* || :
 echo "::group::/usr/local/bin/*"
@@ -34,8 +33,6 @@ echo "::endgroup::"
 # ~1GB (From 1.2GB to 214MB)
 sudo rm -rf \
   /usr/local/bin/aliyun \
-  /usr/local/bin/aws \
-  /usr/local/bin/aws_completer \
   /usr/local/bin/azcopy \
   /usr/local/bin/bicep \
   /usr/local/bin/cmake-gui \

From 5b17b8402e0444f1a9b9ce1fb4dc2b7b92e9aede Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Wed, 15 Nov 2023 09:21:22 +0100
Subject: [PATCH 09/23] GH-38712: [Python] Remove dead code in
 _reconstruct_block (#38714)

### Rationale for this change

It seems the object case in `_reconstruct_block` is a dead code and is not needed anymore so therefore could be removed.

### What changes are included in this PR?

Removal of the object case in `_reconstruct_block` code. Was also looking at the `arrow_to_pandas.cc` code to see if there is any dead code present and I couldn't find any.

### Are these changes tested?

The change in this PR should not make any of the existing tests fail.

### Are there any user-facing changes?

There shouldn't be.
* Closes: #38712

Authored-by: AlenkaF <frim.alenka@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/pandas_compat.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index e232603ba45ac..be29f68a13d5f 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -26,7 +26,6 @@
 from itertools import zip_longest
 import json
 import operator
-import pickle
 import re
 import warnings
 
@@ -721,9 +720,6 @@ def _reconstruct_block(item, columns=None, extension_columns=None):
         block = _int.make_block(block_arr, placement=placement,
                                 klass=_int.DatetimeTZBlock,
                                 dtype=dtype)
-    elif 'object' in item:
-        block = _int.make_block(pickle.loads(block_arr),
-                                placement=placement)
     elif 'py_array' in item:
         # create ExtensionBlock
         arr = item['py_array']

From cc627ee7e35807a98717603d0a2520685919e17c Mon Sep 17 00:00:00 2001
From: Hyunseok Seo <hsseo0501@gmail.com>
Date: Wed, 15 Nov 2023 19:01:43 +0900
Subject: [PATCH 10/23] GH-38599: [Docs] Update Headers (#38696)

### Rationale for this change

Noticed wrong section headings on the web doc and proposing a fix.

### Are these changes tested?

Yes. Built and verified the documentation locally.

<img width="1049" alt="image" src="https://github.com/apache/arrow/assets/6668548/793d6222-2bc5-4f55-ab67-5abe5283add4">

### Are there any user-facing changes?

No.
* Closes: #38599

Authored-by: Hyunseok Seo <hsseo0501@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 docs/source/format/CDeviceDataInterface.rst | 29 ++++++++++-----------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/docs/source/format/CDeviceDataInterface.rst b/docs/source/format/CDeviceDataInterface.rst
index b54e6eabe0b2d..a584852df87eb 100644
--- a/docs/source/format/CDeviceDataInterface.rst
+++ b/docs/source/format/CDeviceDataInterface.rst
@@ -277,7 +277,7 @@ has the following fields:
     to access the memory in the buffers.
 
     If an event is provided, then the producer MUST ensure that the exported
-    data is available on the device before the event is triggered. The 
+    data is available on the device before the event is triggered. The
     consumer SHOULD wait on the event before trying to access the exported
     data.
 
@@ -290,7 +290,7 @@ has the following fields:
     As non-CPU development expands, there may be a need to expand this
     structure. In order to do so without potentially breaking ABI changes,
     we reserve 24 bytes at the end of the object. These bytes MUST be zero'd
-    out after initialization by the producer in order to ensure safe 
+    out after initialization by the producer in order to ensure safe
     evolution of the ABI in the future.
 
 .. _c-device-data-interface-event-types:
@@ -300,7 +300,7 @@ Synchronization event types
 
 The table below lists the expected event types for each device type.
 If no event type is supported ("N/A"), then the ``sync_event`` member
-should always be null. 
+should always be null.
 
 Remember that the event *CAN* be null if synchronization is not needed
 to access the data.
@@ -352,7 +352,7 @@ Memory management
 -----------------
 
 First and foremost: Out of everything in this interface, it is *only* the
-data buffers themselves which reside in device memory (i.e. the ``buffers`` 
+data buffers themselves which reside in device memory (i.e. the ``buffers``
 member of the ``ArrowArray`` struct). Everything else should be in CPU
 memory.
 
@@ -408,7 +408,7 @@ see inconsistent data while the other is mutating it.
 Synchronization
 ---------------
 
-If the ``sync_event`` member is non-NULL, the consumer should not attempt 
+If the ``sync_event`` member is non-NULL, the consumer should not attempt
 to access or read the data until they have synchronized on that event. If
 the ``sync_event`` member is NULL, then it MUST be safe to access the data
 without any synchronization necessary on the part of the consumer.
@@ -501,7 +501,6 @@ could be used for any device:
         arr->array.release(&arr->array);
     }
 
-=======================
 Device Stream Interface
 =======================
 
@@ -510,7 +509,7 @@ interface also specifies a higher-level structure for easing communication
 of streaming data within a single process.
 
 Semantics
-=========
+---------
 
 An Arrow C device stream exposes a streaming source of data chunks, each with
 the same schema. Chunks are obtained by calling a blocking pull-style iteration
@@ -520,7 +519,7 @@ to provide a stream of data on multiple device types, a producer should
 provide a separate stream object for each device type.
 
 Structure definition
-====================
+--------------------
 
 The C device stream interface is defined by a single ``struct`` definition:
 
@@ -554,7 +553,7 @@ The C device stream interface is defined by a single ``struct`` definition:
     kept exactly as-is when these definitions are copied.
 
 The ArrowDeviceArrayStream structure
-------------------------------------
+''''''''''''''''''''''''''''''''''''
 
 The ``ArrowDeviceArrayStream`` provides a device type that can access the
 resulting data along with the required callbacks to interact with a
@@ -627,20 +626,20 @@ streaming source of Arrow arrays. It has the following fields:
     handled by the producer, and especially by the release callback.
 
 Result lifetimes
-----------------
+''''''''''''''''
 
 The data returned by the ``get_schema`` and ``get_next`` callbacks must be
 released independantly. Their lifetimes are not tied to that of
 ``ArrowDeviceArrayStream``.
 
 Stream lifetime
----------------
+'''''''''''''''
 
 Lifetime of the C stream is managed using a release callback with similar
 usage as in :ref:`C data interface <c-data-interface-released>`.
 
 Thread safety
--------------
+'''''''''''''
 
 The stream source is not assumed to be thread-safe. Consumers wanting to
 call ``get_next`` from several threads should ensure those calls are
@@ -652,9 +651,9 @@ Interoperability with other interchange formats
 Other interchange APIs, such as the `CUDA Array Interface`_, include
 members to pass the shape and the data types of the data buffers being
 exported. This information is necessary to interpret the raw bytes in the
-device data buffers that are being shared. Rather than store the 
-shape / types of the data alongside the ``ArrowDeviceArray``, users 
-should utilize the existing ``ArrowSchema`` structure to pass any data 
+device data buffers that are being shared. Rather than store the
+shape / types of the data alongside the ``ArrowDeviceArray``, users
+should utilize the existing ``ArrowSchema`` structure to pass any data
 type and shape information.
 
 Updating this specification

From b55d13c16eb25f3264645e53cc03aa1f7d753b25 Mon Sep 17 00:00:00 2001
From: Ben Harkins <60872452+benibus@users.noreply.github.com>
Date: Wed, 15 Nov 2023 06:05:07 -0500
Subject: [PATCH 11/23] GH-36036: [C++][Python][Parquet] Implement Float16
 logical type (#36073)

### Rationale for this change

There is currently an active proposal to support half-float types in Parquet. For more details/discussion, see the links in this PR's accompanying issue.

### What changes are included in this PR?

This PR implements basic support for a `Float16LogicalType` in accordance with the proposed spec. More specifically, this includes:

- Changes to `parquet.thrift` and regenerated `parqet_types` files
- Basic `LogicalType` class definition, method impls, and enums
- Support for specialized comparisons and column statistics

In the interest of scope, this PR does not currently deal with arrow integration and byte split encoding - although we will want both of these features resolved before the proposal is approved.

### Are these changes tested?

Yes (tests are included)

### Are there any user-facing changes?

Yes

* Closes: #36036

Lead-authored-by: benibus <bpharks@gmx.com>
Co-authored-by: Ben Harkins <60872452+benibus@users.noreply.github.com>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/CMakeLists.txt                  |    1 +
 cpp/src/arrow/util/CMakeLists.txt             |    1 +
 cpp/src/arrow/util/float16.cc                 |  226 ++
 cpp/src/arrow/util/float16.h                  |  209 ++
 cpp/src/arrow/util/float16_test.cc            |  367 +++
 cpp/src/generated/parquet_types.cpp           | 2267 +++++++++--------
 cpp/src/generated/parquet_types.h             |   45 +-
 .../parquet/arrow/arrow_reader_writer_test.cc |   33 +-
 cpp/src/parquet/arrow/arrow_schema_test.cc    |    7 +-
 cpp/src/parquet/arrow/reader_internal.cc      |   25 +
 cpp/src/parquet/arrow/schema.cc               |    5 +
 cpp/src/parquet/arrow/schema_internal.cc      |    2 +
 cpp/src/parquet/arrow/test_util.h             |   21 +-
 cpp/src/parquet/column_writer.cc              |   30 +
 cpp/src/parquet/page_index_test.cc            |   22 +
 cpp/src/parquet/parquet.thrift                |    2 +
 cpp/src/parquet/schema_test.cc                |   29 +-
 cpp/src/parquet/statistics.cc                 |  179 +-
 cpp/src/parquet/statistics_test.cc            |  355 ++-
 cpp/src/parquet/test_util.cc                  |   10 +
 cpp/src/parquet/test_util.h                   |    4 +
 cpp/src/parquet/types.cc                      |   26 +
 cpp/src/parquet/types.h                       |   13 +
 docs/source/cpp/parquet.rst                   |    2 +
 24 files changed, 2670 insertions(+), 1211 deletions(-)
 create mode 100644 cpp/src/arrow/util/float16.cc
 create mode 100644 cpp/src/arrow/util/float16.h
 create mode 100644 cpp/src/arrow/util/float16_test.cc

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 101b089ba837f..24e8eefad1523 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -223,6 +223,7 @@ set(ARROW_SRCS
     util/debug.cc
     util/decimal.cc
     util/delimiting.cc
+    util/float16.cc
     util/formatting.cc
     util/future.cc
     util/hashing.cc
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index 3dc8eac1abf64..2e9487dcf50c8 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -48,6 +48,7 @@ add_arrow_test(utility-test
                checked_cast_test.cc
                compression_test.cc
                decimal_test.cc
+               float16_test.cc
                formatting_util_test.cc
                key_value_metadata_test.cc
                hashing_test.cc
diff --git a/cpp/src/arrow/util/float16.cc b/cpp/src/arrow/util/float16.cc
new file mode 100644
index 0000000000000..5c8b3d10ca0cd
--- /dev/null
+++ b/cpp/src/arrow/util/float16.cc
@@ -0,0 +1,226 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <ostream>
+#include <type_traits>
+
+#include "arrow/util/float16.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace util {
+
+namespace {
+
+// --------------------------------------------------------
+// Binary conversions
+// --------------------------------------------------------
+// These routines are partially adapted from Numpy's C implementation
+//
+// Some useful metrics for conversions between different precisions:
+// |-----------------------------------------|
+// | precision | half    | single  | double  |
+// |-----------------------------------------|
+// | mantissa  | 10 bits | 23 bits | 52 bits |
+// | exponent  | 5 bits  | 8 bits  | 11 bits |
+// | sign      | 1 bit   | 1 bit   | 1 bit   |
+// | exp bias  | 15      | 127     | 1023    |
+// |-----------------------------------------|
+
+template <typename T>
+struct BinaryConverter {
+  static_assert(std::is_same_v<T, uint32_t> || std::is_same_v<T, uint64_t>);
+
+  static constexpr int kNumBits = sizeof(T) * 8;
+  static constexpr int kMantNumBits = (kNumBits == 32) ? 23 : 52;
+  static constexpr int kExpNumBits = kNumBits - kMantNumBits - 1;
+
+  static constexpr int kExpBias = (1 << (kExpNumBits - 1)) - 1;
+
+  static constexpr T kMantMask = (T(1) << kMantNumBits) - 1;
+  static constexpr T kExpMask = ((T(1) << kExpNumBits) - 1) << kMantNumBits;
+  static constexpr T kSignMask = T(1) << (kNumBits - 1);
+
+  static_assert(kMantNumBits + kExpNumBits + 1 == kNumBits);
+  static_assert(kSignMask + kExpMask + kMantMask == ~T(0));
+
+  static uint16_t ToBinary16(T);
+  static T FromBinary16(uint16_t);
+};
+
+// Converts a IEEE binary32/64 into a binary16. Rounds to nearest with ties to zero
+template <typename T>
+uint16_t BinaryConverter<T>::ToBinary16(T f_bits) {
+  // Sign mask for output binary16
+  const uint16_t h_sign = uint16_t((f_bits >> (kNumBits - 16)) & 0x8000);
+
+  // Exponent mask for input binary
+  const T f_exp = f_bits & kExpMask;
+  // Exponents as signed pre-shifted values for convenience. Here, we need to re-bias the
+  // exponent for a binary16. If, after re-biasing, the binary16 exponent falls outside of
+  // the range [1,30] then we need to handle the under/overflow case specially.
+  const int16_t f_biased_exp = int16_t(f_exp >> kMantNumBits);
+  const int16_t unbiased_exp = f_biased_exp - kExpBias;
+  const int16_t h_biased_exp = unbiased_exp + 15;
+
+  // Mantissa mask for input
+  const T f_mant = f_bits & kMantMask;
+
+  // We define a "rounding bit", which is the most significant bit to be dropped
+  // (e.g. for a binary32, 0x1000).
+  constexpr T rounding_bit = T(1) << (kMantNumBits - (10 + 1));
+
+  // Handle exponent overflow, NaN, and +/-Inf
+  if (h_biased_exp >= 0x1f) {
+    // The input is a NaN representation
+    if (f_exp == kExpMask && f_mant != 0) {
+      uint16_t h_mant = uint16_t(f_mant >> (kMantNumBits - 10));
+      // If the mantissa bit(s) indicating NaN were shifted out, add one back. Otherwise,
+      // the result would be infinity.
+      if (h_mant == 0) {
+        h_mant = 0x1;
+      }
+      return uint16_t(h_sign | 0x7c00u | h_mant);
+    }
+
+    // Clamp to +/-infinity
+    return uint16_t(h_sign | 0x7c00u);
+  }
+
+  // Handle exponent underflow, subnormals, and +/-0
+  if (h_biased_exp <= 0) {
+    // If the underflow exceeds the number of bits in a binary16 mantissa (10) then we
+    // can't round, so just clamp to 0. Note that this also weeds out any input values
+    // that are subnormal - including +/-0;
+    if (h_biased_exp < -10) {
+      return h_sign;
+    }
+
+    // Convert to a rounded subnormal value starting with the mantissa. Since the input
+    // input is known to be normal at this point, we need to prepend its implicit leading
+    // bit - which also necessitates an additional right-shift.
+    T rounded_mant = (T(1) << kMantNumBits) | f_mant;
+    rounded_mant >>= (1 - h_biased_exp);
+
+    // Here, we implement rounding to nearest (with ties to even)
+    //
+    // By now, our new mantissa has two conceptual ranges:
+    //  - The lower 13 bits, which will be shifted out
+    //  - The upper 10 bits, which will become the binary16's mantissa
+    //
+    // "Rounding to nearest" basically just means that we add 1 to the rounding bit. If
+    // it's set, then the bit will cascade upwards into the 10-bit mantissa (and
+    // potentially the exponent). The only time where we may NOT do this is when a "tie"
+    // occurs - i.e. when the rounding bit is set but all of the lower bits are 0. In that
+    // case, we don't add 1 if the retained mantissa is "even" (its least significant bit
+    // is 0).
+    if ((rounded_mant & ((rounding_bit << 2) - 1)) != rounding_bit ||
+        (f_mant & 0x7ffu) != 0) {
+      rounded_mant += rounding_bit;
+    }
+
+    const uint16_t h_mant = uint16_t(rounded_mant >> (kMantNumBits - 10));
+    return h_sign + h_mant;
+  }
+
+  const uint16_t h_exp = uint16_t(h_biased_exp) << 10;
+
+  // See comment on rounding behavior above
+  T rounded_mant = f_mant;
+  if ((rounded_mant & ((rounding_bit << 2) - 1)) != rounding_bit) {
+    rounded_mant += rounding_bit;
+  }
+
+  const uint16_t h_mant = uint16_t(rounded_mant >> (kMantNumBits - 10));
+  // Note that we ADD (rather than OR) the components because we want the carryover bit
+  // from rounding the mantissa to cascade through the exponent (it shouldn't affect the
+  // sign bit though).
+  return h_sign + h_exp + h_mant;
+}
+
+// Converts a IEEE binary16 into a binary32/64
+template <typename T>
+T BinaryConverter<T>::FromBinary16(uint16_t h_bits) {
+  // Sign mask for output
+  const T f_sign = T(h_bits & 0x8000u) << (kNumBits - 16);
+
+  // Exponent mask for input binary16
+  const uint16_t h_exp = h_bits & 0x7c00;
+  // Mantissa mask for input binary16
+  const uint16_t h_mant = h_bits & 0x3ffu;
+
+  switch (h_exp) {
+    // Handle Inf and NaN
+    case 0x7c00u:
+      return f_sign | kExpMask | (T(h_mant) << (kMantNumBits - 10));
+    // Handle zeros and subnormals
+    case 0x0000u: {
+      // Input is +/-0
+      if (h_mant == 0) {
+        return f_sign;
+      }
+      // Subnormal binary16 to normal binary32/64
+      //
+      // Start with an f32/64-biased exponent of 2^-15. We then decrement it until the
+      // most significant set bit is left-shifted out - as it doesn't get explicitly
+      // stored in normalized floating point values. Instead, its existence is implied by
+      // the new exponent.
+      T f_exp = kExpBias - 15;
+      T f_mant = T(h_mant) << 1;
+      while ((f_mant & 0x0400u) == 0) {
+        --f_exp;
+        f_mant <<= 1;
+      }
+      f_exp <<= kMantNumBits;
+      f_mant = (f_mant & 0x03ffu) << (kMantNumBits - 10);
+      return f_sign | f_exp | f_mant;
+    } break;
+    // Handle normals
+    default:
+      // Equivalent to rebiasing the exponent and shifting everything by the remaining
+      // mantissa bits.
+      return f_sign |
+             ((T(h_bits & 0x7fffu) + (T(kExpBias - 15) << 10)) << (kMantNumBits - 10));
+  }
+}
+
+}  // namespace
+
+float Float16::ToFloat() const {
+  const uint32_t f_bits = BinaryConverter<uint32_t>::FromBinary16(bits_);
+  return SafeCopy<float>(f_bits);
+}
+
+Float16 Float16::FromFloat(float f) {
+  const uint32_t f_bits = SafeCopy<uint32_t>(f);
+  return FromBits(BinaryConverter<uint32_t>::ToBinary16(f_bits));
+}
+
+double Float16::ToDouble() const {
+  const uint64_t d_bits = BinaryConverter<uint64_t>::FromBinary16(bits_);
+  return SafeCopy<double>(d_bits);
+}
+
+Float16 Float16::FromDouble(double d) {
+  const uint64_t d_bits = SafeCopy<uint64_t>(d);
+  return FromBits(BinaryConverter<uint64_t>::ToBinary16(d_bits));
+}
+
+std::ostream& operator<<(std::ostream& os, Float16 arg) { return (os << arg.ToFloat()); }
+
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/float16.h b/cpp/src/arrow/util/float16.h
new file mode 100644
index 0000000000000..0a432fee2cd31
--- /dev/null
+++ b/cpp/src/arrow/util/float16.h
@@ -0,0 +1,209 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <array>
+#include <cstdint>
+#include <cstring>
+#include <iosfwd>
+#include <limits>
+#include <type_traits>
+
+#include "arrow/util/endian.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/ubsan.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace util {
+
+/// \brief Class representing an IEEE half-precision float, encoded as a `uint16_t`
+///
+/// The exact format is as follows (from LSB to MSB):
+/// - bits 0-10:  mantissa
+/// - bits 10-15: exponent
+/// - bit 15:     sign
+///
+class ARROW_EXPORT Float16 {
+ public:
+  Float16() = default;
+  explicit Float16(float f) : Float16(FromFloat(f)) {}
+  explicit Float16(double d) : Float16(FromDouble(d)) {}
+  template <typename T,
+            typename std::enable_if_t<std::is_convertible_v<T, double>>* = NULLPTR>
+  explicit Float16(T v) : Float16(static_cast<double>(v)) {}
+
+  /// \brief Create a `Float16` from its exact binary representation
+  constexpr static Float16 FromBits(uint16_t bits) { return Float16{bits, bool{}}; }
+  /// \brief Create a `Float16` from a 32-bit float (may lose precision)
+  static Float16 FromFloat(float f);
+  /// \brief Create a `Float16` from a 64-bit float (may lose precision)
+  static Float16 FromDouble(double d);
+
+  /// \brief Read a `Float16` from memory in native-endian byte order
+  static Float16 FromBytes(const uint8_t* src) {
+    return FromBits(SafeLoadAs<uint16_t>(src));
+  }
+
+  /// \brief Read a `Float16` from memory in little-endian byte order
+  static Float16 FromLittleEndian(const uint8_t* src) {
+    return FromBits(::arrow::bit_util::FromLittleEndian(SafeLoadAs<uint16_t>(src)));
+  }
+
+  /// \brief Read a `Float16` from memory in big-endian byte order
+  static Float16 FromBigEndian(const uint8_t* src) {
+    return FromBits(::arrow::bit_util::FromBigEndian(SafeLoadAs<uint16_t>(src)));
+  }
+
+  /// \brief Return the value's binary representation as a `uint16_t`
+  constexpr uint16_t bits() const { return bits_; }
+
+  /// \brief Return true if the value is negative (sign bit is set)
+  constexpr bool signbit() const { return (bits_ & 0x8000) != 0; }
+
+  /// \brief Return true if the value is NaN
+  constexpr bool is_nan() const { return (bits_ & 0x7fff) > 0x7c00; }
+  /// \brief Return true if the value is positive/negative infinity
+  constexpr bool is_infinity() const { return (bits_ & 0x7fff) == 0x7c00; }
+  /// \brief Return true if the value is finite and not NaN
+  constexpr bool is_finite() const { return (bits_ & 0x7c00) != 0x7c00; }
+  /// \brief Return true if the value is positive/negative zero
+  constexpr bool is_zero() const { return (bits_ & 0x7fff) == 0; }
+
+  /// \brief Convert to a 32-bit float
+  float ToFloat() const;
+  /// \brief Convert to a 64-bit float
+  double ToDouble() const;
+
+  explicit operator float() const { return ToFloat(); }
+  explicit operator double() const { return ToDouble(); }
+
+  /// \brief Copy the value's bytes in native-endian byte order
+  void ToBytes(uint8_t* dest) const { std::memcpy(dest, &bits_, sizeof(bits_)); }
+  /// \brief Return the value's bytes in native-endian byte order
+  constexpr std::array<uint8_t, 2> ToBytes() const {
+#if ARROW_LITTLE_ENDIAN
+    return ToLittleEndian();
+#else
+    return ToBigEndian();
+#endif
+  }
+
+  /// \brief Copy the value's bytes in little-endian byte order
+  void ToLittleEndian(uint8_t* dest) const {
+    const auto bytes = ToLittleEndian();
+    std::memcpy(dest, bytes.data(), bytes.size());
+  }
+  /// \brief Return the value's bytes in little-endian byte order
+  constexpr std::array<uint8_t, 2> ToLittleEndian() const {
+#if ARROW_LITTLE_ENDIAN
+    return {uint8_t(bits_ & 0xff), uint8_t(bits_ >> 8)};
+#else
+    return {uint8_t(bits_ >> 8), uint8_t(bits_ & 0xff)};
+#endif
+  }
+
+  /// \brief Copy the value's bytes in big-endian byte order
+  void ToBigEndian(uint8_t* dest) const {
+    const auto bytes = ToBigEndian();
+    std::memcpy(dest, bytes.data(), bytes.size());
+  }
+  /// \brief Return the value's bytes in big-endian byte order
+  constexpr std::array<uint8_t, 2> ToBigEndian() const {
+#if ARROW_LITTLE_ENDIAN
+    return {uint8_t(bits_ >> 8), uint8_t(bits_ & 0xff)};
+#else
+    return {uint8_t(bits_ & 0xff), uint8_t(bits_ >> 8)};
+#endif
+  }
+
+  constexpr Float16 operator-() const { return FromBits(bits_ ^ 0x8000); }
+  constexpr Float16 operator+() const { return FromBits(bits_); }
+
+  friend constexpr bool operator==(Float16 lhs, Float16 rhs) {
+    if (lhs.is_nan() || rhs.is_nan()) return false;
+    return Float16::CompareEq(lhs, rhs);
+  }
+  friend constexpr bool operator!=(Float16 lhs, Float16 rhs) { return !(lhs == rhs); }
+
+  friend constexpr bool operator<(Float16 lhs, Float16 rhs) {
+    if (lhs.is_nan() || rhs.is_nan()) return false;
+    return Float16::CompareLt(lhs, rhs);
+  }
+  friend constexpr bool operator>(Float16 lhs, Float16 rhs) { return rhs < lhs; }
+
+  friend constexpr bool operator<=(Float16 lhs, Float16 rhs) {
+    if (lhs.is_nan() || rhs.is_nan()) return false;
+    return !Float16::CompareLt(rhs, lhs);
+  }
+  friend constexpr bool operator>=(Float16 lhs, Float16 rhs) { return rhs <= lhs; }
+
+  ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os, Float16 arg);
+
+ protected:
+  uint16_t bits_;
+
+ private:
+  constexpr Float16(uint16_t bits, bool) : bits_(bits) {}
+
+  // Comparison helpers that assume neither operand is NaN
+  static constexpr bool CompareEq(Float16 lhs, Float16 rhs) {
+    return (lhs.bits() == rhs.bits()) || (lhs.is_zero() && rhs.is_zero());
+  }
+  static constexpr bool CompareLt(Float16 lhs, Float16 rhs) {
+    if (lhs.signbit()) {
+      if (rhs.signbit()) {
+        // Both are negative
+        return lhs.bits() > rhs.bits();
+      } else {
+        // Handle +/-0
+        return !lhs.is_zero() || rhs.bits() != 0;
+      }
+    } else if (rhs.signbit()) {
+      return false;
+    } else {
+      // Both are positive
+      return lhs.bits() < rhs.bits();
+    }
+  }
+};
+
+static_assert(std::is_trivial_v<Float16>);
+
+}  // namespace util
+}  // namespace arrow
+
+// TODO: Not complete
+template <>
+class std::numeric_limits<arrow::util::Float16> {
+  using T = arrow::util::Float16;
+
+ public:
+  static constexpr bool is_specialized = true;
+  static constexpr bool is_signed = true;
+  static constexpr bool has_infinity = true;
+  static constexpr bool has_quiet_NaN = true;
+
+  static constexpr T min() { return T::FromBits(0b0000010000000000); }
+  static constexpr T max() { return T::FromBits(0b0111101111111111); }
+  static constexpr T lowest() { return -max(); }
+
+  static constexpr T infinity() { return T::FromBits(0b0111110000000000); }
+
+  static constexpr T quiet_NaN() { return T::FromBits(0b0111111111111111); }
+};
diff --git a/cpp/src/arrow/util/float16_test.cc b/cpp/src/arrow/util/float16_test.cc
new file mode 100644
index 0000000000000..073375882e3c2
--- /dev/null
+++ b/cpp/src/arrow/util/float16_test.cc
@@ -0,0 +1,367 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <array>
+#include <cmath>
+#include <utility>
+
+#include <gtest/gtest.h>
+
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/endian.h"
+#include "arrow/util/float16.h"
+#include "arrow/util/span.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow::util {
+namespace {
+
+template <typename T>
+using Limits = std::numeric_limits<T>;
+
+float F32(uint32_t bits) { return SafeCopy<float>(bits); }
+double F64(uint64_t bits) { return SafeCopy<double>(bits); }
+
+template <typename T>
+class Float16ConversionTest : public ::testing::Test {
+ public:
+  struct RoundTripTestCase {
+    T input;
+    uint16_t bits;
+    T output;
+  };
+
+  static void TestRoundTrip(span<const RoundTripTestCase> test_cases) {
+    for (size_t index = 0; index < test_cases.size(); ++index) {
+      ARROW_SCOPED_TRACE("i=", index);
+      const auto& tc = test_cases[index];
+
+      const auto f16 = Float16(tc.input);
+      EXPECT_EQ(tc.bits, f16.bits());
+      EXPECT_EQ(tc.output, static_cast<T>(f16));
+
+      EXPECT_EQ(std::signbit(tc.output), f16.signbit());
+      EXPECT_EQ(std::isnan(tc.output), f16.is_nan());
+      EXPECT_EQ(std::isinf(tc.output), f16.is_infinity());
+      EXPECT_EQ(std::isfinite(tc.output), f16.is_finite());
+    }
+  }
+
+  static void TestRoundTripFromNaN(span<const T> test_cases) {
+    for (size_t i = 0; i < test_cases.size(); ++i) {
+      ARROW_SCOPED_TRACE("i=", i);
+      const auto input = test_cases[i];
+
+      ASSERT_TRUE(std::isnan(input));
+      const bool sign = std::signbit(input);
+
+      const auto f16 = Float16(input);
+      EXPECT_TRUE(f16.is_nan());
+      EXPECT_EQ(std::isinf(input), f16.is_infinity());
+      EXPECT_EQ(std::isfinite(input), f16.is_finite());
+      EXPECT_EQ(sign, f16.signbit());
+
+      const auto output = static_cast<T>(f16);
+      EXPECT_TRUE(std::isnan(output));
+      EXPECT_EQ(sign, std::signbit(output));
+    }
+  }
+
+  void TestRoundTripFromInf() {
+    const T test_cases[] = {+Limits<T>::infinity(), -Limits<T>::infinity()};
+
+    for (size_t i = 0; i < std::size(test_cases); ++i) {
+      ARROW_SCOPED_TRACE("i=", i);
+      const auto input = test_cases[i];
+
+      ASSERT_TRUE(std::isinf(input));
+      const bool sign = std::signbit(input);
+
+      const auto f16 = Float16(input);
+      EXPECT_TRUE(f16.is_infinity());
+      EXPECT_EQ(std::isfinite(input), f16.is_finite());
+      EXPECT_EQ(std::isnan(input), f16.is_nan());
+      EXPECT_EQ(sign, f16.signbit());
+
+      const auto output = static_cast<T>(f16);
+      EXPECT_TRUE(std::isinf(output));
+      EXPECT_EQ(sign, std::signbit(output));
+    }
+  }
+
+  void TestRoundTrip();
+  void TestRoundTripFromNaN();
+};
+
+template <>
+void Float16ConversionTest<float>::TestRoundTrip() {
+  // Expected values were also manually validated with numpy-1.24.3
+  const RoundTripTestCase test_cases[] = {
+      // +/-0.0f
+      {F32(0x80000000u), 0b1000000000000000u, -0.0f},
+      {F32(0x00000000u), 0b0000000000000000u, +0.0f},
+      // 32-bit exp is 102 => 2^-25. Rounding to nearest.
+      {F32(0xb3000001u), 0b1000000000000001u, -5.96046447754e-8f},
+      // 32-bit exp is 102 => 2^-25. Rounding to even.
+      {F32(0xb3000000u), 0b1000000000000000u, -0.0f},
+      // 32-bit exp is 101 => 2^-26. Underflow to zero.
+      {F32(0xb2800001u), 0b1000000000000000u, -0.0f},
+      // 32-bit exp is 108 => 2^-19.
+      {F32(0xb61a0000u), 0b1000000000100110u, -2.26497650146e-6f},
+      // 32-bit exp is 108 => 2^-19.
+      {F32(0xb61e0000u), 0b1000000000101000u, -2.38418579102e-6f},
+      // 32-bit exp is 112 => 2^-15. Rounding to nearest.
+      {F32(0xb87fa001u), 0b1000001111111111u, -6.09755516052e-5f},
+      // 32-bit exp is 112 => 2^-15. Rounds to 16-bit exp of 1 => 2^-14
+      {F32(0xb87fe001u), 0b1000010000000000u, -6.103515625e-5f},
+      // 32-bit exp is 142 => 2^15. Rounding to nearest.
+      {F32(0xc7001001u), 0b1111100000000001u, -32800.0f},
+      // 32-bit exp is 142 => 2^15. Rounding to even.
+      {F32(0xc7001000u), 0b1111100000000000u, -32768.0f},
+      // 65520.0f rounds to inf
+      {F32(0x477ff000u), 0b0111110000000000u, Limits<float>::infinity()},
+      // 65488.0039062f rounds to 65504.0 (float16 max)
+      {F32(0x477fd001u), 0b0111101111111111u, 65504.0f},
+      // 32-bit exp is 127 => 2^0, rounds to 16-bit exp of 16 => 2^1.
+      {F32(0xbffff000u), 0b1100000000000000u, -2.0f},
+      // Extreme values should safely clamp to +/-inf
+      {Limits<float>::max(), 0b0111110000000000u, +Limits<float>::infinity()},
+      {Limits<float>::lowest(), 0b1111110000000000u, -Limits<float>::infinity()},
+  };
+
+  TestRoundTrip(span(test_cases, std::size(test_cases)));
+}
+
+template <>
+void Float16ConversionTest<double>::TestRoundTrip() {
+  // Expected values were also manually validated with numpy-1.24.3
+  const RoundTripTestCase test_cases[] = {
+      // +/-0.0
+      {F64(0x8000000000000000u), 0b1000000000000000u, -0.0},
+      {F64(0x0000000000000000u), 0b0000000000000000u, +0.0},
+      // 64-bit exp is 998 => 2^-25. Rounding to nearest.
+      {F64(0xbe60000000000001u), 0b1000000000000001u, -5.9604644775390625e-8},
+      // 64-bit exp is 998 => 2^-25. Rounding to even.
+      {F64(0xbe60000000000000u), 0b1000000000000000u, -0.0},
+      // 64-bit exp is 997 => 2^-26. Underflow to zero.
+      {F64(0xbe50000000000001u), 0b1000000000000000u, -0.0},
+      // 64-bit exp is 1004 => 2^-19.
+      {F64(0xbec3400000000000u), 0b1000000000100110u, -2.2649765014648438e-6},
+      // 64-bit exp is 1004 => 2^-19.
+      {F64(0xbec3c00000000000u), 0b1000000000101000u, -2.3841857910156250e-6},
+      // 64-bit exp is 1008 => 2^-15. Rounding to nearest.
+      {F64(0xbf0ff40000000001u), 0b1000001111111111u, -6.0975551605224609e-5},
+      // 64-bit exp is 1008 => 2^-15. Rounds to 16-bit exp of 1 => 2^-14
+      {F64(0xbf0ffc0000000001u), 0b1000010000000000u, -6.1035156250000000e-5},
+      // 64-bit exp is 1038 => 2^15. Rounding to nearest.
+      {F64(0xc0e0020000000001u), 0b1111100000000001u, -32800.0},
+      // 64-bit exp is 1038 => 2^15. Rounding to even.
+      {F64(0xc0e0020000000000u), 0b1111100000000000u, -32768.0},
+      // 65520.0 rounds to inf
+      {F64(0x40effe0000000000u), 0b0111110000000000u, Limits<double>::infinity()},
+      // 65488.00000000001 rounds to 65504.0 (float16 max)
+      {F64(0x40effa0000000001u), 0b0111101111111111u, 65504.0},
+      // 64-bit exp is 1023 => 2^0, rounds to 16-bit exp of 16 => 2^1.
+      {F64(0xbffffe0000000000u), 0b1100000000000000u, -2.0},
+      // Extreme values should safely clamp to +/-inf
+      {Limits<double>::max(), 0b0111110000000000u, +Limits<double>::infinity()},
+      {Limits<double>::lowest(), 0b1111110000000000u, -Limits<double>::infinity()},
+  };
+
+  TestRoundTrip(span(test_cases, std::size(test_cases)));
+}
+
+template <>
+void Float16ConversionTest<float>::TestRoundTripFromNaN() {
+  const float test_cases[] = {
+      Limits<float>::quiet_NaN(), F32(0x7f800001u), F32(0xff800001u), F32(0x7fc00000u),
+      F32(0xffc00000u),           F32(0x7fffffffu), F32(0xffffffffu)};
+  TestRoundTripFromNaN(span(test_cases, std::size(test_cases)));
+}
+
+template <>
+void Float16ConversionTest<double>::TestRoundTripFromNaN() {
+  const double test_cases[] = {Limits<double>::quiet_NaN(), F64(0x7ff0000000000001u),
+                               F64(0xfff0000000000001u),    F64(0x7ff8000000000000u),
+                               F64(0xfff8000000000000u),    F64(0x7fffffffffffffffu),
+                               F64(0xffffffffffffffffu)};
+  TestRoundTripFromNaN(span(test_cases, std::size(test_cases)));
+}
+
+using NativeFloatTypes = ::testing::Types<float, double>;
+
+TYPED_TEST_SUITE(Float16ConversionTest, NativeFloatTypes);
+
+TYPED_TEST(Float16ConversionTest, RoundTrip) { this->TestRoundTrip(); }
+TYPED_TEST(Float16ConversionTest, RoundTripFromNaN) { this->TestRoundTripFromNaN(); }
+TYPED_TEST(Float16ConversionTest, RoundTripFromInf) { this->TestRoundTripFromInf(); }
+
+TEST(Float16Test, ConstexprFunctions) {
+  constexpr auto a = Float16::FromBits(0xbc00);  // -1.0
+  constexpr auto b = Float16::FromBits(0x3c00);  // +1.0
+
+  static_assert(a.bits() == 0xbc00);
+  static_assert(a.signbit() == true);
+  static_assert(a.is_nan() == false);
+  static_assert(a.is_infinity() == false);
+  static_assert(a.is_finite() == true);
+  static_assert(a.is_zero() == false);
+
+  static_assert((a == b) == false);
+  static_assert((a != b) == true);
+  static_assert((a < b) == true);
+  static_assert((a > b) == false);
+  static_assert((a <= b) == true);
+  static_assert((a >= b) == false);
+  static_assert(-a == +b);
+
+  constexpr auto v = Float16::FromBits(0xffff);
+  static_assert(v.ToBytes()[0] == 0xff);
+  static_assert(v.ToLittleEndian()[0] == 0xff);
+  static_assert(v.ToBigEndian()[0] == 0xff);
+}
+
+TEST(Float16Test, Constructors) {
+  // Construction from exact bits
+  ASSERT_EQ(1, Float16::FromBits(1).bits());
+  // Construction from floating point (including implicit conversions)
+  int i = 0;
+  for (auto f16 : {Float16(1.0f), Float16(1.0), Float16(1)}) {
+    ARROW_SCOPED_TRACE("i=", i++);
+    ASSERT_EQ(0x3c00, f16.bits());
+  }
+}
+
+TEST(Float16Test, Compare) {
+  constexpr float f32_inf = Limits<float>::infinity();
+  constexpr float f32_nan = Limits<float>::quiet_NaN();
+
+  const struct {
+    Float16 f16;
+    float f32;
+  } test_values[] = {
+      {Limits<Float16>::min(), +6.103515625e-05f},
+      {Limits<Float16>::max(), +65504.0f},
+      {Limits<Float16>::lowest(), -65504.0f},
+      {+Limits<Float16>::infinity(), +f32_inf},
+      {-Limits<Float16>::infinity(), -f32_inf},
+      // Multiple (semantically equivalent) NaN representations
+      {Float16::FromBits(0x7e00), f32_nan},
+      {Float16::FromBits(0xfe00), f32_nan},
+      {Float16::FromBits(0x7fff), f32_nan},
+      {Float16::FromBits(0xffff), f32_nan},
+      // Positive/negative zeros
+      {Float16::FromBits(0x0000), +0.0f},
+      {Float16::FromBits(0x8000), -0.0f},
+      // Miscellaneous values. In general, they're chosen to test the sign/exponent and
+      // exponent/mantissa boundaries
+      {Float16::FromBits(0x101c), +0.00050163269043f},
+      {Float16::FromBits(0x901c), -0.00050163269043f},
+      {Float16::FromBits(0x101d), +0.000502109527588f},
+      {Float16::FromBits(0x901d), -0.000502109527588f},
+      {Float16::FromBits(0x121c), +0.00074577331543f},
+      {Float16::FromBits(0x921c), -0.00074577331543f},
+      {Float16::FromBits(0x141c), +0.00100326538086f},
+      {Float16::FromBits(0x941c), -0.00100326538086f},
+      {Float16::FromBits(0x501c), +32.875f},
+      {Float16::FromBits(0xd01c), -32.875f},
+      // A few subnormals for good measure
+      {Float16::FromBits(0x001c), +1.66893005371e-06f},
+      {Float16::FromBits(0x801c), -1.66893005371e-06f},
+      {Float16::FromBits(0x021c), +3.21865081787e-05f},
+      {Float16::FromBits(0x821c), -3.21865081787e-05f},
+  };
+
+  auto expect_op = [&](std::string op_name, auto op) {
+    ARROW_SCOPED_TRACE(op_name);
+    const auto num_values = static_cast<int>(std::size(test_values));
+
+    // Check all combinations of operands in both directions
+    for (int i = 0; i < num_values; ++i) {
+      for (int j = 0; j < num_values; ++j) {
+        auto [a16, a32] = test_values[i];
+        auto [b16, b32] = test_values[j];
+        ARROW_SCOPED_TRACE("[", i, ",", j, "] = ", a16, ",", b16);
+
+        // Results for float16 and float32 should be the same
+        ASSERT_EQ(op(a16, b16), op(a32, b32));
+      }
+    }
+  };
+
+  // Verify that our "equivalent" 16/32-bit values actually are
+  for (const auto& v : test_values) {
+    if (std::isnan(v.f32)) {
+      ASSERT_TRUE(std::isnan(v.f16.ToFloat()));
+    } else {
+      ASSERT_EQ(v.f32, v.f16.ToFloat());
+    }
+  }
+
+  expect_op("equal", [](auto l, auto r) { return l == r; });
+  expect_op("not_equal", [](auto l, auto r) { return l != r; });
+  expect_op("less", [](auto l, auto r) { return l < r; });
+  expect_op("greater", [](auto l, auto r) { return l > r; });
+  expect_op("less_equal", [](auto l, auto r) { return l <= r; });
+  expect_op("greater_equal", [](auto l, auto r) { return l >= r; });
+}
+
+TEST(Float16Test, ToBytes) {
+  constexpr auto f16 = Float16::FromBits(0xd01c);
+  std::array<uint8_t, 2> bytes;
+  auto load = [&bytes]() { return SafeLoadAs<uint16_t>(bytes.data()); };
+
+  // Test native-endian
+  f16.ToBytes(bytes.data());
+  ASSERT_EQ(load(), 0xd01c);
+  bytes = f16.ToBytes();
+  ASSERT_EQ(load(), 0xd01c);
+
+#if ARROW_LITTLE_ENDIAN
+  constexpr uint16_t expected_le = 0xd01c;
+  constexpr uint16_t expected_be = 0x1cd0;
+#else
+  constexpr uint16_t expected_le = 0x1cd0;
+  constexpr uint16_t expected_be = 0xd01c;
+#endif
+  // Test little-endian
+  f16.ToLittleEndian(bytes.data());
+  ASSERT_EQ(load(), expected_le);
+  bytes = f16.ToLittleEndian();
+  ASSERT_EQ(load(), expected_le);
+  // Test big-endian
+  f16.ToBigEndian(bytes.data());
+  ASSERT_EQ(load(), expected_be);
+  bytes = f16.ToBigEndian();
+  ASSERT_EQ(load(), expected_be);
+}
+
+TEST(Float16Test, FromBytes) {
+  constexpr uint16_t u16 = 0xd01c;
+  const auto* data = reinterpret_cast<const uint8_t*>(&u16);
+  ASSERT_EQ(Float16::FromBytes(data), Float16::FromBits(0xd01c));
+#if ARROW_LITTLE_ENDIAN
+  ASSERT_EQ(Float16::FromLittleEndian(data), Float16::FromBits(0xd01c));
+  ASSERT_EQ(Float16::FromBigEndian(data), Float16::FromBits(0x1cd0));
+#else
+  ASSERT_EQ(Float16::FromLittleEndian(data), Float16(0x1cd0));
+  ASSERT_EQ(Float16::FromBigEndian(data), Float16(0xd01c));
+#endif
+}
+
+}  // namespace
+}  // namespace arrow::util
diff --git a/cpp/src/generated/parquet_types.cpp b/cpp/src/generated/parquet_types.cpp
index f4e378fd3822a..86188581e0c42 100644
--- a/cpp/src/generated/parquet_types.cpp
+++ b/cpp/src/generated/parquet_types.cpp
@@ -1288,6 +1288,81 @@ void DateType::printTo(std::ostream& out) const {
 }
 
 
+Float16Type::~Float16Type() noexcept {
+}
+
+std::ostream& operator<<(std::ostream& out, const Float16Type& obj)
+{
+  obj.printTo(out);
+  return out;
+}
+
+
+uint32_t Float16Type::read(::apache::thrift::protocol::TProtocol* iprot) {
+
+  ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
+  uint32_t xfer = 0;
+  std::string fname;
+  ::apache::thrift::protocol::TType ftype;
+  int16_t fid;
+
+  xfer += iprot->readStructBegin(fname);
+
+  using ::apache::thrift::protocol::TProtocolException;
+
+
+  while (true)
+  {
+    xfer += iprot->readFieldBegin(fname, ftype, fid);
+    if (ftype == ::apache::thrift::protocol::T_STOP) {
+      break;
+    }
+    xfer += iprot->skip(ftype);
+    xfer += iprot->readFieldEnd();
+  }
+
+  xfer += iprot->readStructEnd();
+
+  return xfer;
+}
+
+uint32_t Float16Type::write(::apache::thrift::protocol::TProtocol* oprot) const {
+  uint32_t xfer = 0;
+  ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);
+  xfer += oprot->writeStructBegin("Float16Type");
+
+  xfer += oprot->writeFieldStop();
+  xfer += oprot->writeStructEnd();
+  return xfer;
+}
+
+void swap(Float16Type &a, Float16Type &b) {
+  using ::std::swap;
+  (void) a;
+  (void) b;
+}
+
+Float16Type::Float16Type(const Float16Type& other28) noexcept {
+  (void) other28;
+}
+Float16Type::Float16Type(Float16Type&& other29) noexcept {
+  (void) other29;
+}
+Float16Type& Float16Type::operator=(const Float16Type& other30) noexcept {
+  (void) other30;
+  return *this;
+}
+Float16Type& Float16Type::operator=(Float16Type&& other31) noexcept {
+  (void) other31;
+  return *this;
+}
+void Float16Type::printTo(std::ostream& out) const {
+  using ::apache::thrift::to_string;
+  out << "Float16Type(";
+  out << ")";
+}
+
+
 NullType::~NullType() noexcept {
 }
 
@@ -1342,18 +1417,18 @@ void swap(NullType &a, NullType &b) {
   (void) b;
 }
 
-NullType::NullType(const NullType& other28) noexcept {
-  (void) other28;
+NullType::NullType(const NullType& other32) noexcept {
+  (void) other32;
 }
-NullType::NullType(NullType&& other29) noexcept {
-  (void) other29;
+NullType::NullType(NullType&& other33) noexcept {
+  (void) other33;
 }
-NullType& NullType::operator=(const NullType& other30) noexcept {
-  (void) other30;
+NullType& NullType::operator=(const NullType& other34) noexcept {
+  (void) other34;
   return *this;
 }
-NullType& NullType::operator=(NullType&& other31) noexcept {
-  (void) other31;
+NullType& NullType::operator=(NullType&& other35) noexcept {
+  (void) other35;
   return *this;
 }
 void NullType::printTo(std::ostream& out) const {
@@ -1460,22 +1535,22 @@ void swap(DecimalType &a, DecimalType &b) {
   swap(a.precision, b.precision);
 }
 
-DecimalType::DecimalType(const DecimalType& other32) noexcept {
-  scale = other32.scale;
-  precision = other32.precision;
+DecimalType::DecimalType(const DecimalType& other36) noexcept {
+  scale = other36.scale;
+  precision = other36.precision;
 }
-DecimalType::DecimalType(DecimalType&& other33) noexcept {
-  scale = other33.scale;
-  precision = other33.precision;
+DecimalType::DecimalType(DecimalType&& other37) noexcept {
+  scale = other37.scale;
+  precision = other37.precision;
 }
-DecimalType& DecimalType::operator=(const DecimalType& other34) noexcept {
-  scale = other34.scale;
-  precision = other34.precision;
+DecimalType& DecimalType::operator=(const DecimalType& other38) noexcept {
+  scale = other38.scale;
+  precision = other38.precision;
   return *this;
 }
-DecimalType& DecimalType::operator=(DecimalType&& other35) noexcept {
-  scale = other35.scale;
-  precision = other35.precision;
+DecimalType& DecimalType::operator=(DecimalType&& other39) noexcept {
+  scale = other39.scale;
+  precision = other39.precision;
   return *this;
 }
 void DecimalType::printTo(std::ostream& out) const {
@@ -1541,18 +1616,18 @@ void swap(MilliSeconds &a, MilliSeconds &b) {
   (void) b;
 }
 
-MilliSeconds::MilliSeconds(const MilliSeconds& other36) noexcept {
-  (void) other36;
+MilliSeconds::MilliSeconds(const MilliSeconds& other40) noexcept {
+  (void) other40;
 }
-MilliSeconds::MilliSeconds(MilliSeconds&& other37) noexcept {
-  (void) other37;
+MilliSeconds::MilliSeconds(MilliSeconds&& other41) noexcept {
+  (void) other41;
 }
-MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other38) noexcept {
-  (void) other38;
+MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other42) noexcept {
+  (void) other42;
   return *this;
 }
-MilliSeconds& MilliSeconds::operator=(MilliSeconds&& other39) noexcept {
-  (void) other39;
+MilliSeconds& MilliSeconds::operator=(MilliSeconds&& other43) noexcept {
+  (void) other43;
   return *this;
 }
 void MilliSeconds::printTo(std::ostream& out) const {
@@ -1616,18 +1691,18 @@ void swap(MicroSeconds &a, MicroSeconds &b) {
   (void) b;
 }
 
-MicroSeconds::MicroSeconds(const MicroSeconds& other40) noexcept {
-  (void) other40;
+MicroSeconds::MicroSeconds(const MicroSeconds& other44) noexcept {
+  (void) other44;
 }
-MicroSeconds::MicroSeconds(MicroSeconds&& other41) noexcept {
-  (void) other41;
+MicroSeconds::MicroSeconds(MicroSeconds&& other45) noexcept {
+  (void) other45;
 }
-MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other42) noexcept {
-  (void) other42;
+MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other46) noexcept {
+  (void) other46;
   return *this;
 }
-MicroSeconds& MicroSeconds::operator=(MicroSeconds&& other43) noexcept {
-  (void) other43;
+MicroSeconds& MicroSeconds::operator=(MicroSeconds&& other47) noexcept {
+  (void) other47;
   return *this;
 }
 void MicroSeconds::printTo(std::ostream& out) const {
@@ -1691,18 +1766,18 @@ void swap(NanoSeconds &a, NanoSeconds &b) {
   (void) b;
 }
 
-NanoSeconds::NanoSeconds(const NanoSeconds& other44) noexcept {
-  (void) other44;
+NanoSeconds::NanoSeconds(const NanoSeconds& other48) noexcept {
+  (void) other48;
 }
-NanoSeconds::NanoSeconds(NanoSeconds&& other45) noexcept {
-  (void) other45;
+NanoSeconds::NanoSeconds(NanoSeconds&& other49) noexcept {
+  (void) other49;
 }
-NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other46) noexcept {
-  (void) other46;
+NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other50) noexcept {
+  (void) other50;
   return *this;
 }
-NanoSeconds& NanoSeconds::operator=(NanoSeconds&& other47) noexcept {
-  (void) other47;
+NanoSeconds& NanoSeconds::operator=(NanoSeconds&& other51) noexcept {
+  (void) other51;
   return *this;
 }
 void NanoSeconds::printTo(std::ostream& out) const {
@@ -1827,30 +1902,30 @@ void swap(TimeUnit &a, TimeUnit &b) {
   swap(a.__isset, b.__isset);
 }
 
-TimeUnit::TimeUnit(const TimeUnit& other48) noexcept {
-  MILLIS = other48.MILLIS;
-  MICROS = other48.MICROS;
-  NANOS = other48.NANOS;
-  __isset = other48.__isset;
+TimeUnit::TimeUnit(const TimeUnit& other52) noexcept {
+  MILLIS = other52.MILLIS;
+  MICROS = other52.MICROS;
+  NANOS = other52.NANOS;
+  __isset = other52.__isset;
 }
-TimeUnit::TimeUnit(TimeUnit&& other49) noexcept {
-  MILLIS = std::move(other49.MILLIS);
-  MICROS = std::move(other49.MICROS);
-  NANOS = std::move(other49.NANOS);
-  __isset = other49.__isset;
+TimeUnit::TimeUnit(TimeUnit&& other53) noexcept {
+  MILLIS = std::move(other53.MILLIS);
+  MICROS = std::move(other53.MICROS);
+  NANOS = std::move(other53.NANOS);
+  __isset = other53.__isset;
 }
-TimeUnit& TimeUnit::operator=(const TimeUnit& other50) noexcept {
-  MILLIS = other50.MILLIS;
-  MICROS = other50.MICROS;
-  NANOS = other50.NANOS;
-  __isset = other50.__isset;
+TimeUnit& TimeUnit::operator=(const TimeUnit& other54) noexcept {
+  MILLIS = other54.MILLIS;
+  MICROS = other54.MICROS;
+  NANOS = other54.NANOS;
+  __isset = other54.__isset;
   return *this;
 }
-TimeUnit& TimeUnit::operator=(TimeUnit&& other51) noexcept {
-  MILLIS = std::move(other51.MILLIS);
-  MICROS = std::move(other51.MICROS);
-  NANOS = std::move(other51.NANOS);
-  __isset = other51.__isset;
+TimeUnit& TimeUnit::operator=(TimeUnit&& other55) noexcept {
+  MILLIS = std::move(other55.MILLIS);
+  MICROS = std::move(other55.MICROS);
+  NANOS = std::move(other55.NANOS);
+  __isset = other55.__isset;
   return *this;
 }
 void TimeUnit::printTo(std::ostream& out) const {
@@ -1960,22 +2035,22 @@ void swap(TimestampType &a, TimestampType &b) {
   swap(a.unit, b.unit);
 }
 
-TimestampType::TimestampType(const TimestampType& other52) noexcept {
-  isAdjustedToUTC = other52.isAdjustedToUTC;
-  unit = other52.unit;
+TimestampType::TimestampType(const TimestampType& other56) noexcept {
+  isAdjustedToUTC = other56.isAdjustedToUTC;
+  unit = other56.unit;
 }
-TimestampType::TimestampType(TimestampType&& other53) noexcept {
-  isAdjustedToUTC = other53.isAdjustedToUTC;
-  unit = std::move(other53.unit);
+TimestampType::TimestampType(TimestampType&& other57) noexcept {
+  isAdjustedToUTC = other57.isAdjustedToUTC;
+  unit = std::move(other57.unit);
 }
-TimestampType& TimestampType::operator=(const TimestampType& other54) noexcept {
-  isAdjustedToUTC = other54.isAdjustedToUTC;
-  unit = other54.unit;
+TimestampType& TimestampType::operator=(const TimestampType& other58) noexcept {
+  isAdjustedToUTC = other58.isAdjustedToUTC;
+  unit = other58.unit;
   return *this;
 }
-TimestampType& TimestampType::operator=(TimestampType&& other55) noexcept {
-  isAdjustedToUTC = other55.isAdjustedToUTC;
-  unit = std::move(other55.unit);
+TimestampType& TimestampType::operator=(TimestampType&& other59) noexcept {
+  isAdjustedToUTC = other59.isAdjustedToUTC;
+  unit = std::move(other59.unit);
   return *this;
 }
 void TimestampType::printTo(std::ostream& out) const {
@@ -2084,22 +2159,22 @@ void swap(TimeType &a, TimeType &b) {
   swap(a.unit, b.unit);
 }
 
-TimeType::TimeType(const TimeType& other56) noexcept {
-  isAdjustedToUTC = other56.isAdjustedToUTC;
-  unit = other56.unit;
+TimeType::TimeType(const TimeType& other60) noexcept {
+  isAdjustedToUTC = other60.isAdjustedToUTC;
+  unit = other60.unit;
 }
-TimeType::TimeType(TimeType&& other57) noexcept {
-  isAdjustedToUTC = other57.isAdjustedToUTC;
-  unit = std::move(other57.unit);
+TimeType::TimeType(TimeType&& other61) noexcept {
+  isAdjustedToUTC = other61.isAdjustedToUTC;
+  unit = std::move(other61.unit);
 }
-TimeType& TimeType::operator=(const TimeType& other58) noexcept {
-  isAdjustedToUTC = other58.isAdjustedToUTC;
-  unit = other58.unit;
+TimeType& TimeType::operator=(const TimeType& other62) noexcept {
+  isAdjustedToUTC = other62.isAdjustedToUTC;
+  unit = other62.unit;
   return *this;
 }
-TimeType& TimeType::operator=(TimeType&& other59) noexcept {
-  isAdjustedToUTC = other59.isAdjustedToUTC;
-  unit = std::move(other59.unit);
+TimeType& TimeType::operator=(TimeType&& other63) noexcept {
+  isAdjustedToUTC = other63.isAdjustedToUTC;
+  unit = std::move(other63.unit);
   return *this;
 }
 void TimeType::printTo(std::ostream& out) const {
@@ -2208,22 +2283,22 @@ void swap(IntType &a, IntType &b) {
   swap(a.isSigned, b.isSigned);
 }
 
-IntType::IntType(const IntType& other60) noexcept {
-  bitWidth = other60.bitWidth;
-  isSigned = other60.isSigned;
+IntType::IntType(const IntType& other64) noexcept {
+  bitWidth = other64.bitWidth;
+  isSigned = other64.isSigned;
 }
-IntType::IntType(IntType&& other61) noexcept {
-  bitWidth = other61.bitWidth;
-  isSigned = other61.isSigned;
+IntType::IntType(IntType&& other65) noexcept {
+  bitWidth = other65.bitWidth;
+  isSigned = other65.isSigned;
 }
-IntType& IntType::operator=(const IntType& other62) noexcept {
-  bitWidth = other62.bitWidth;
-  isSigned = other62.isSigned;
+IntType& IntType::operator=(const IntType& other66) noexcept {
+  bitWidth = other66.bitWidth;
+  isSigned = other66.isSigned;
   return *this;
 }
-IntType& IntType::operator=(IntType&& other63) noexcept {
-  bitWidth = other63.bitWidth;
-  isSigned = other63.isSigned;
+IntType& IntType::operator=(IntType&& other67) noexcept {
+  bitWidth = other67.bitWidth;
+  isSigned = other67.isSigned;
   return *this;
 }
 void IntType::printTo(std::ostream& out) const {
@@ -2289,18 +2364,18 @@ void swap(JsonType &a, JsonType &b) {
   (void) b;
 }
 
-JsonType::JsonType(const JsonType& other64) noexcept {
-  (void) other64;
+JsonType::JsonType(const JsonType& other68) noexcept {
+  (void) other68;
 }
-JsonType::JsonType(JsonType&& other65) noexcept {
-  (void) other65;
+JsonType::JsonType(JsonType&& other69) noexcept {
+  (void) other69;
 }
-JsonType& JsonType::operator=(const JsonType& other66) noexcept {
-  (void) other66;
+JsonType& JsonType::operator=(const JsonType& other70) noexcept {
+  (void) other70;
   return *this;
 }
-JsonType& JsonType::operator=(JsonType&& other67) noexcept {
-  (void) other67;
+JsonType& JsonType::operator=(JsonType&& other71) noexcept {
+  (void) other71;
   return *this;
 }
 void JsonType::printTo(std::ostream& out) const {
@@ -2364,18 +2439,18 @@ void swap(BsonType &a, BsonType &b) {
   (void) b;
 }
 
-BsonType::BsonType(const BsonType& other68) noexcept {
-  (void) other68;
+BsonType::BsonType(const BsonType& other72) noexcept {
+  (void) other72;
 }
-BsonType::BsonType(BsonType&& other69) noexcept {
-  (void) other69;
+BsonType::BsonType(BsonType&& other73) noexcept {
+  (void) other73;
 }
-BsonType& BsonType::operator=(const BsonType& other70) noexcept {
-  (void) other70;
+BsonType& BsonType::operator=(const BsonType& other74) noexcept {
+  (void) other74;
   return *this;
 }
-BsonType& BsonType::operator=(BsonType&& other71) noexcept {
-  (void) other71;
+BsonType& BsonType::operator=(BsonType&& other75) noexcept {
+  (void) other75;
   return *this;
 }
 void BsonType::printTo(std::ostream& out) const {
@@ -2453,6 +2528,11 @@ void LogicalType::__set_UUID(const UUIDType& val) {
   this->UUID = val;
 __isset.UUID = true;
 }
+
+void LogicalType::__set_FLOAT16(const Float16Type& val) {
+  this->FLOAT16 = val;
+__isset.FLOAT16 = true;
+}
 std::ostream& operator<<(std::ostream& out, const LogicalType& obj)
 {
   obj.printTo(out);
@@ -2585,6 +2665,14 @@ uint32_t LogicalType::read(::apache::thrift::protocol::TProtocol* iprot) {
           xfer += iprot->skip(ftype);
         }
         break;
+      case 15:
+        if (ftype == ::apache::thrift::protocol::T_STRUCT) {
+          xfer += this->FLOAT16.read(iprot);
+          this->__isset.FLOAT16 = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
       default:
         xfer += iprot->skip(ftype);
         break;
@@ -2667,6 +2755,11 @@ uint32_t LogicalType::write(::apache::thrift::protocol::TProtocol* oprot) const
     xfer += this->UUID.write(oprot);
     xfer += oprot->writeFieldEnd();
   }
+  if (this->__isset.FLOAT16) {
+    xfer += oprot->writeFieldBegin("FLOAT16", ::apache::thrift::protocol::T_STRUCT, 15);
+    xfer += this->FLOAT16.write(oprot);
+    xfer += oprot->writeFieldEnd();
+  }
   xfer += oprot->writeFieldStop();
   xfer += oprot->writeStructEnd();
   return xfer;
@@ -2687,73 +2780,78 @@ void swap(LogicalType &a, LogicalType &b) {
   swap(a.JSON, b.JSON);
   swap(a.BSON, b.BSON);
   swap(a.UUID, b.UUID);
+  swap(a.FLOAT16, b.FLOAT16);
   swap(a.__isset, b.__isset);
 }
 
-LogicalType::LogicalType(const LogicalType& other72) noexcept {
-  STRING = other72.STRING;
-  MAP = other72.MAP;
-  LIST = other72.LIST;
-  ENUM = other72.ENUM;
-  DECIMAL = other72.DECIMAL;
-  DATE = other72.DATE;
-  TIME = other72.TIME;
-  TIMESTAMP = other72.TIMESTAMP;
-  INTEGER = other72.INTEGER;
-  UNKNOWN = other72.UNKNOWN;
-  JSON = other72.JSON;
-  BSON = other72.BSON;
-  UUID = other72.UUID;
-  __isset = other72.__isset;
-}
-LogicalType::LogicalType(LogicalType&& other73) noexcept {
-  STRING = std::move(other73.STRING);
-  MAP = std::move(other73.MAP);
-  LIST = std::move(other73.LIST);
-  ENUM = std::move(other73.ENUM);
-  DECIMAL = std::move(other73.DECIMAL);
-  DATE = std::move(other73.DATE);
-  TIME = std::move(other73.TIME);
-  TIMESTAMP = std::move(other73.TIMESTAMP);
-  INTEGER = std::move(other73.INTEGER);
-  UNKNOWN = std::move(other73.UNKNOWN);
-  JSON = std::move(other73.JSON);
-  BSON = std::move(other73.BSON);
-  UUID = std::move(other73.UUID);
-  __isset = other73.__isset;
-}
-LogicalType& LogicalType::operator=(const LogicalType& other74) noexcept {
-  STRING = other74.STRING;
-  MAP = other74.MAP;
-  LIST = other74.LIST;
-  ENUM = other74.ENUM;
-  DECIMAL = other74.DECIMAL;
-  DATE = other74.DATE;
-  TIME = other74.TIME;
-  TIMESTAMP = other74.TIMESTAMP;
-  INTEGER = other74.INTEGER;
-  UNKNOWN = other74.UNKNOWN;
-  JSON = other74.JSON;
-  BSON = other74.BSON;
-  UUID = other74.UUID;
-  __isset = other74.__isset;
+LogicalType::LogicalType(const LogicalType& other76) noexcept {
+  STRING = other76.STRING;
+  MAP = other76.MAP;
+  LIST = other76.LIST;
+  ENUM = other76.ENUM;
+  DECIMAL = other76.DECIMAL;
+  DATE = other76.DATE;
+  TIME = other76.TIME;
+  TIMESTAMP = other76.TIMESTAMP;
+  INTEGER = other76.INTEGER;
+  UNKNOWN = other76.UNKNOWN;
+  JSON = other76.JSON;
+  BSON = other76.BSON;
+  UUID = other76.UUID;
+  FLOAT16 = other76.FLOAT16;
+  __isset = other76.__isset;
+}
+LogicalType::LogicalType(LogicalType&& other77) noexcept {
+  STRING = std::move(other77.STRING);
+  MAP = std::move(other77.MAP);
+  LIST = std::move(other77.LIST);
+  ENUM = std::move(other77.ENUM);
+  DECIMAL = std::move(other77.DECIMAL);
+  DATE = std::move(other77.DATE);
+  TIME = std::move(other77.TIME);
+  TIMESTAMP = std::move(other77.TIMESTAMP);
+  INTEGER = std::move(other77.INTEGER);
+  UNKNOWN = std::move(other77.UNKNOWN);
+  JSON = std::move(other77.JSON);
+  BSON = std::move(other77.BSON);
+  UUID = std::move(other77.UUID);
+  FLOAT16 = std::move(other77.FLOAT16);
+  __isset = other77.__isset;
+}
+LogicalType& LogicalType::operator=(const LogicalType& other78) noexcept {
+  STRING = other78.STRING;
+  MAP = other78.MAP;
+  LIST = other78.LIST;
+  ENUM = other78.ENUM;
+  DECIMAL = other78.DECIMAL;
+  DATE = other78.DATE;
+  TIME = other78.TIME;
+  TIMESTAMP = other78.TIMESTAMP;
+  INTEGER = other78.INTEGER;
+  UNKNOWN = other78.UNKNOWN;
+  JSON = other78.JSON;
+  BSON = other78.BSON;
+  UUID = other78.UUID;
+  FLOAT16 = other78.FLOAT16;
+  __isset = other78.__isset;
   return *this;
 }
-LogicalType& LogicalType::operator=(LogicalType&& other75) noexcept {
-  STRING = std::move(other75.STRING);
-  MAP = std::move(other75.MAP);
-  LIST = std::move(other75.LIST);
-  ENUM = std::move(other75.ENUM);
-  DECIMAL = std::move(other75.DECIMAL);
-  DATE = std::move(other75.DATE);
-  TIME = std::move(other75.TIME);
-  TIMESTAMP = std::move(other75.TIMESTAMP);
-  INTEGER = std::move(other75.INTEGER);
-  UNKNOWN = std::move(other75.UNKNOWN);
-  JSON = std::move(other75.JSON);
-  BSON = std::move(other75.BSON);
-  UUID = std::move(other75.UUID);
-  __isset = other75.__isset;
+LogicalType& LogicalType::operator=(LogicalType&& other79) noexcept {
+  STRING = std::move(other79.STRING);
+  MAP = std::move(other79.MAP);
+  LIST = std::move(other79.LIST);
+  ENUM = std::move(other79.ENUM);
+  DECIMAL = std::move(other79.DECIMAL);
+  DATE = std::move(other79.DATE);
+  TIME = std::move(other79.TIME);
+  TIMESTAMP = std::move(other79.TIMESTAMP);
+  INTEGER = std::move(other79.INTEGER);
+  UNKNOWN = std::move(other79.UNKNOWN);
+  JSON = std::move(other79.JSON);
+  BSON = std::move(other79.BSON);
+  UUID = std::move(other79.UUID);
+  FLOAT16 = std::move(other79.FLOAT16);
+  __isset = other79.__isset;
   return *this;
 }
 void LogicalType::printTo(std::ostream& out) const {
@@ -2772,6 +2870,7 @@ void LogicalType::printTo(std::ostream& out) const {
   out << ", " << "JSON="; (__isset.JSON ? (out << to_string(JSON)) : (out << "<null>"));
   out << ", " << "BSON="; (__isset.BSON ? (out << to_string(BSON)) : (out << "<null>"));
   out << ", " << "UUID="; (__isset.UUID ? (out << to_string(UUID)) : (out << "<null>"));
+  out << ", " << "FLOAT16="; (__isset.FLOAT16 ? (out << to_string(FLOAT16)) : (out << "<null>"));
   out << ")";
 }
 
@@ -2859,9 +2958,9 @@ uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) {
     {
       case 1:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast76;
-          xfer += iprot->readI32(ecast76);
-          this->type = static_cast<Type::type>(ecast76);
+          int32_t ecast80;
+          xfer += iprot->readI32(ecast80);
+          this->type = static_cast<Type::type>(ecast80);
           this->__isset.type = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -2877,9 +2976,9 @@ uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) {
         break;
       case 3:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast77;
-          xfer += iprot->readI32(ecast77);
-          this->repetition_type = static_cast<FieldRepetitionType::type>(ecast77);
+          int32_t ecast81;
+          xfer += iprot->readI32(ecast81);
+          this->repetition_type = static_cast<FieldRepetitionType::type>(ecast81);
           this->__isset.repetition_type = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -2903,9 +3002,9 @@ uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) {
         break;
       case 6:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast78;
-          xfer += iprot->readI32(ecast78);
-          this->converted_type = static_cast<ConvertedType::type>(ecast78);
+          int32_t ecast82;
+          xfer += iprot->readI32(ecast82);
+          this->converted_type = static_cast<ConvertedType::type>(ecast82);
           this->__isset.converted_type = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -3031,58 +3130,58 @@ void swap(SchemaElement &a, SchemaElement &b) {
   swap(a.__isset, b.__isset);
 }
 
-SchemaElement::SchemaElement(const SchemaElement& other79) {
-  type = other79.type;
-  type_length = other79.type_length;
-  repetition_type = other79.repetition_type;
-  name = other79.name;
-  num_children = other79.num_children;
-  converted_type = other79.converted_type;
-  scale = other79.scale;
-  precision = other79.precision;
-  field_id = other79.field_id;
-  logicalType = other79.logicalType;
-  __isset = other79.__isset;
-}
-SchemaElement::SchemaElement(SchemaElement&& other80) noexcept {
-  type = other80.type;
-  type_length = other80.type_length;
-  repetition_type = other80.repetition_type;
-  name = std::move(other80.name);
-  num_children = other80.num_children;
-  converted_type = other80.converted_type;
-  scale = other80.scale;
-  precision = other80.precision;
-  field_id = other80.field_id;
-  logicalType = std::move(other80.logicalType);
-  __isset = other80.__isset;
-}
-SchemaElement& SchemaElement::operator=(const SchemaElement& other81) {
-  type = other81.type;
-  type_length = other81.type_length;
-  repetition_type = other81.repetition_type;
-  name = other81.name;
-  num_children = other81.num_children;
-  converted_type = other81.converted_type;
-  scale = other81.scale;
-  precision = other81.precision;
-  field_id = other81.field_id;
-  logicalType = other81.logicalType;
-  __isset = other81.__isset;
+SchemaElement::SchemaElement(const SchemaElement& other83) {
+  type = other83.type;
+  type_length = other83.type_length;
+  repetition_type = other83.repetition_type;
+  name = other83.name;
+  num_children = other83.num_children;
+  converted_type = other83.converted_type;
+  scale = other83.scale;
+  precision = other83.precision;
+  field_id = other83.field_id;
+  logicalType = other83.logicalType;
+  __isset = other83.__isset;
+}
+SchemaElement::SchemaElement(SchemaElement&& other84) noexcept {
+  type = other84.type;
+  type_length = other84.type_length;
+  repetition_type = other84.repetition_type;
+  name = std::move(other84.name);
+  num_children = other84.num_children;
+  converted_type = other84.converted_type;
+  scale = other84.scale;
+  precision = other84.precision;
+  field_id = other84.field_id;
+  logicalType = std::move(other84.logicalType);
+  __isset = other84.__isset;
+}
+SchemaElement& SchemaElement::operator=(const SchemaElement& other85) {
+  type = other85.type;
+  type_length = other85.type_length;
+  repetition_type = other85.repetition_type;
+  name = other85.name;
+  num_children = other85.num_children;
+  converted_type = other85.converted_type;
+  scale = other85.scale;
+  precision = other85.precision;
+  field_id = other85.field_id;
+  logicalType = other85.logicalType;
+  __isset = other85.__isset;
   return *this;
 }
-SchemaElement& SchemaElement::operator=(SchemaElement&& other82) noexcept {
-  type = other82.type;
-  type_length = other82.type_length;
-  repetition_type = other82.repetition_type;
-  name = std::move(other82.name);
-  num_children = other82.num_children;
-  converted_type = other82.converted_type;
-  scale = other82.scale;
-  precision = other82.precision;
-  field_id = other82.field_id;
-  logicalType = std::move(other82.logicalType);
-  __isset = other82.__isset;
+SchemaElement& SchemaElement::operator=(SchemaElement&& other86) noexcept {
+  type = other86.type;
+  type_length = other86.type_length;
+  repetition_type = other86.repetition_type;
+  name = std::move(other86.name);
+  num_children = other86.num_children;
+  converted_type = other86.converted_type;
+  scale = other86.scale;
+  precision = other86.precision;
+  field_id = other86.field_id;
+  logicalType = std::move(other86.logicalType);
+  __isset = other86.__isset;
   return *this;
 }
 void SchemaElement::printTo(std::ostream& out) const {
@@ -3168,9 +3267,9 @@ uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
         break;
       case 2:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast83;
-          xfer += iprot->readI32(ecast83);
-          this->encoding = static_cast<Encoding::type>(ecast83);
+          int32_t ecast87;
+          xfer += iprot->readI32(ecast87);
+          this->encoding = static_cast<Encoding::type>(ecast87);
           isset_encoding = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -3178,9 +3277,9 @@ uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
         break;
       case 3:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast84;
-          xfer += iprot->readI32(ecast84);
-          this->definition_level_encoding = static_cast<Encoding::type>(ecast84);
+          int32_t ecast88;
+          xfer += iprot->readI32(ecast88);
+          this->definition_level_encoding = static_cast<Encoding::type>(ecast88);
           isset_definition_level_encoding = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -3188,9 +3287,9 @@ uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
         break;
       case 4:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast85;
-          xfer += iprot->readI32(ecast85);
-          this->repetition_level_encoding = static_cast<Encoding::type>(ecast85);
+          int32_t ecast89;
+          xfer += iprot->readI32(ecast89);
+          this->repetition_level_encoding = static_cast<Encoding::type>(ecast89);
           isset_repetition_level_encoding = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -3265,38 +3364,38 @@ void swap(DataPageHeader &a, DataPageHeader &b) {
   swap(a.__isset, b.__isset);
 }
 
-DataPageHeader::DataPageHeader(const DataPageHeader& other86) {
-  num_values = other86.num_values;
-  encoding = other86.encoding;
-  definition_level_encoding = other86.definition_level_encoding;
-  repetition_level_encoding = other86.repetition_level_encoding;
-  statistics = other86.statistics;
-  __isset = other86.__isset;
-}
-DataPageHeader::DataPageHeader(DataPageHeader&& other87) noexcept {
-  num_values = other87.num_values;
-  encoding = other87.encoding;
-  definition_level_encoding = other87.definition_level_encoding;
-  repetition_level_encoding = other87.repetition_level_encoding;
-  statistics = std::move(other87.statistics);
-  __isset = other87.__isset;
-}
-DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other88) {
-  num_values = other88.num_values;
-  encoding = other88.encoding;
-  definition_level_encoding = other88.definition_level_encoding;
-  repetition_level_encoding = other88.repetition_level_encoding;
-  statistics = other88.statistics;
-  __isset = other88.__isset;
+DataPageHeader::DataPageHeader(const DataPageHeader& other90) {
+  num_values = other90.num_values;
+  encoding = other90.encoding;
+  definition_level_encoding = other90.definition_level_encoding;
+  repetition_level_encoding = other90.repetition_level_encoding;
+  statistics = other90.statistics;
+  __isset = other90.__isset;
+}
+DataPageHeader::DataPageHeader(DataPageHeader&& other91) noexcept {
+  num_values = other91.num_values;
+  encoding = other91.encoding;
+  definition_level_encoding = other91.definition_level_encoding;
+  repetition_level_encoding = other91.repetition_level_encoding;
+  statistics = std::move(other91.statistics);
+  __isset = other91.__isset;
+}
+DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other92) {
+  num_values = other92.num_values;
+  encoding = other92.encoding;
+  definition_level_encoding = other92.definition_level_encoding;
+  repetition_level_encoding = other92.repetition_level_encoding;
+  statistics = other92.statistics;
+  __isset = other92.__isset;
   return *this;
 }
-DataPageHeader& DataPageHeader::operator=(DataPageHeader&& other89) noexcept {
-  num_values = other89.num_values;
-  encoding = other89.encoding;
-  definition_level_encoding = other89.definition_level_encoding;
-  repetition_level_encoding = other89.repetition_level_encoding;
-  statistics = std::move(other89.statistics);
-  __isset = other89.__isset;
+DataPageHeader& DataPageHeader::operator=(DataPageHeader&& other93) noexcept {
+  num_values = other93.num_values;
+  encoding = other93.encoding;
+  definition_level_encoding = other93.definition_level_encoding;
+  repetition_level_encoding = other93.repetition_level_encoding;
+  statistics = std::move(other93.statistics);
+  __isset = other93.__isset;
   return *this;
 }
 void DataPageHeader::printTo(std::ostream& out) const {
@@ -3365,18 +3464,18 @@ void swap(IndexPageHeader &a, IndexPageHeader &b) {
   (void) b;
 }
 
-IndexPageHeader::IndexPageHeader(const IndexPageHeader& other90) noexcept {
-  (void) other90;
+IndexPageHeader::IndexPageHeader(const IndexPageHeader& other94) noexcept {
+  (void) other94;
 }
-IndexPageHeader::IndexPageHeader(IndexPageHeader&& other91) noexcept {
-  (void) other91;
+IndexPageHeader::IndexPageHeader(IndexPageHeader&& other95) noexcept {
+  (void) other95;
 }
-IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other92) noexcept {
-  (void) other92;
+IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other96) noexcept {
+  (void) other96;
   return *this;
 }
-IndexPageHeader& IndexPageHeader::operator=(IndexPageHeader&& other93) noexcept {
-  (void) other93;
+IndexPageHeader& IndexPageHeader::operator=(IndexPageHeader&& other97) noexcept {
+  (void) other97;
   return *this;
 }
 void IndexPageHeader::printTo(std::ostream& out) const {
@@ -3442,9 +3541,9 @@ uint32_t DictionaryPageHeader::read(::apache::thrift::protocol::TProtocol* iprot
         break;
       case 2:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast94;
-          xfer += iprot->readI32(ecast94);
-          this->encoding = static_cast<Encoding::type>(ecast94);
+          int32_t ecast98;
+          xfer += iprot->readI32(ecast98);
+          this->encoding = static_cast<Encoding::type>(ecast98);
           isset_encoding = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -3505,30 +3604,30 @@ void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) {
   swap(a.__isset, b.__isset);
 }
 
-DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other95) noexcept {
-  num_values = other95.num_values;
-  encoding = other95.encoding;
-  is_sorted = other95.is_sorted;
-  __isset = other95.__isset;
+DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other99) noexcept {
+  num_values = other99.num_values;
+  encoding = other99.encoding;
+  is_sorted = other99.is_sorted;
+  __isset = other99.__isset;
 }
-DictionaryPageHeader::DictionaryPageHeader(DictionaryPageHeader&& other96) noexcept {
-  num_values = other96.num_values;
-  encoding = other96.encoding;
-  is_sorted = other96.is_sorted;
-  __isset = other96.__isset;
+DictionaryPageHeader::DictionaryPageHeader(DictionaryPageHeader&& other100) noexcept {
+  num_values = other100.num_values;
+  encoding = other100.encoding;
+  is_sorted = other100.is_sorted;
+  __isset = other100.__isset;
 }
-DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other97) noexcept {
-  num_values = other97.num_values;
-  encoding = other97.encoding;
-  is_sorted = other97.is_sorted;
-  __isset = other97.__isset;
+DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other101) noexcept {
+  num_values = other101.num_values;
+  encoding = other101.encoding;
+  is_sorted = other101.is_sorted;
+  __isset = other101.__isset;
   return *this;
 }
-DictionaryPageHeader& DictionaryPageHeader::operator=(DictionaryPageHeader&& other98) noexcept {
-  num_values = other98.num_values;
-  encoding = other98.encoding;
-  is_sorted = other98.is_sorted;
-  __isset = other98.__isset;
+DictionaryPageHeader& DictionaryPageHeader::operator=(DictionaryPageHeader&& other102) noexcept {
+  num_values = other102.num_values;
+  encoding = other102.encoding;
+  is_sorted = other102.is_sorted;
+  __isset = other102.__isset;
   return *this;
 }
 void DictionaryPageHeader::printTo(std::ostream& out) const {
@@ -3638,9 +3737,9 @@ uint32_t DataPageHeaderV2::read(::apache::thrift::protocol::TProtocol* iprot) {
         break;
       case 4:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast99;
-          xfer += iprot->readI32(ecast99);
-          this->encoding = static_cast<Encoding::type>(ecast99);
+          int32_t ecast103;
+          xfer += iprot->readI32(ecast103);
+          this->encoding = static_cast<Encoding::type>(ecast103);
           isset_encoding = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -3759,50 +3858,50 @@ void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) {
   swap(a.__isset, b.__isset);
 }
 
-DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other100) {
-  num_values = other100.num_values;
-  num_nulls = other100.num_nulls;
-  num_rows = other100.num_rows;
-  encoding = other100.encoding;
-  definition_levels_byte_length = other100.definition_levels_byte_length;
-  repetition_levels_byte_length = other100.repetition_levels_byte_length;
-  is_compressed = other100.is_compressed;
-  statistics = other100.statistics;
-  __isset = other100.__isset;
-}
-DataPageHeaderV2::DataPageHeaderV2(DataPageHeaderV2&& other101) noexcept {
-  num_values = other101.num_values;
-  num_nulls = other101.num_nulls;
-  num_rows = other101.num_rows;
-  encoding = other101.encoding;
-  definition_levels_byte_length = other101.definition_levels_byte_length;
-  repetition_levels_byte_length = other101.repetition_levels_byte_length;
-  is_compressed = other101.is_compressed;
-  statistics = std::move(other101.statistics);
-  __isset = other101.__isset;
-}
-DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other102) {
-  num_values = other102.num_values;
-  num_nulls = other102.num_nulls;
-  num_rows = other102.num_rows;
-  encoding = other102.encoding;
-  definition_levels_byte_length = other102.definition_levels_byte_length;
-  repetition_levels_byte_length = other102.repetition_levels_byte_length;
-  is_compressed = other102.is_compressed;
-  statistics = other102.statistics;
-  __isset = other102.__isset;
+DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other104) {
+  num_values = other104.num_values;
+  num_nulls = other104.num_nulls;
+  num_rows = other104.num_rows;
+  encoding = other104.encoding;
+  definition_levels_byte_length = other104.definition_levels_byte_length;
+  repetition_levels_byte_length = other104.repetition_levels_byte_length;
+  is_compressed = other104.is_compressed;
+  statistics = other104.statistics;
+  __isset = other104.__isset;
+}
+DataPageHeaderV2::DataPageHeaderV2(DataPageHeaderV2&& other105) noexcept {
+  num_values = other105.num_values;
+  num_nulls = other105.num_nulls;
+  num_rows = other105.num_rows;
+  encoding = other105.encoding;
+  definition_levels_byte_length = other105.definition_levels_byte_length;
+  repetition_levels_byte_length = other105.repetition_levels_byte_length;
+  is_compressed = other105.is_compressed;
+  statistics = std::move(other105.statistics);
+  __isset = other105.__isset;
+}
+DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other106) {
+  num_values = other106.num_values;
+  num_nulls = other106.num_nulls;
+  num_rows = other106.num_rows;
+  encoding = other106.encoding;
+  definition_levels_byte_length = other106.definition_levels_byte_length;
+  repetition_levels_byte_length = other106.repetition_levels_byte_length;
+  is_compressed = other106.is_compressed;
+  statistics = other106.statistics;
+  __isset = other106.__isset;
   return *this;
 }
-DataPageHeaderV2& DataPageHeaderV2::operator=(DataPageHeaderV2&& other103) noexcept {
-  num_values = other103.num_values;
-  num_nulls = other103.num_nulls;
-  num_rows = other103.num_rows;
-  encoding = other103.encoding;
-  definition_levels_byte_length = other103.definition_levels_byte_length;
-  repetition_levels_byte_length = other103.repetition_levels_byte_length;
-  is_compressed = other103.is_compressed;
-  statistics = std::move(other103.statistics);
-  __isset = other103.__isset;
+DataPageHeaderV2& DataPageHeaderV2::operator=(DataPageHeaderV2&& other107) noexcept {
+  num_values = other107.num_values;
+  num_nulls = other107.num_nulls;
+  num_rows = other107.num_rows;
+  encoding = other107.encoding;
+  definition_levels_byte_length = other107.definition_levels_byte_length;
+  repetition_levels_byte_length = other107.repetition_levels_byte_length;
+  is_compressed = other107.is_compressed;
+  statistics = std::move(other107.statistics);
+  __isset = other107.__isset;
   return *this;
 }
 void DataPageHeaderV2::printTo(std::ostream& out) const {
@@ -3874,18 +3973,18 @@ void swap(SplitBlockAlgorithm &a, SplitBlockAlgorithm &b) {
   (void) b;
 }
 
-SplitBlockAlgorithm::SplitBlockAlgorithm(const SplitBlockAlgorithm& other104) noexcept {
-  (void) other104;
+SplitBlockAlgorithm::SplitBlockAlgorithm(const SplitBlockAlgorithm& other108) noexcept {
+  (void) other108;
 }
-SplitBlockAlgorithm::SplitBlockAlgorithm(SplitBlockAlgorithm&& other105) noexcept {
-  (void) other105;
+SplitBlockAlgorithm::SplitBlockAlgorithm(SplitBlockAlgorithm&& other109) noexcept {
+  (void) other109;
 }
-SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(const SplitBlockAlgorithm& other106) noexcept {
-  (void) other106;
+SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(const SplitBlockAlgorithm& other110) noexcept {
+  (void) other110;
   return *this;
 }
-SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(SplitBlockAlgorithm&& other107) noexcept {
-  (void) other107;
+SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(SplitBlockAlgorithm&& other111) noexcept {
+  (void) other111;
   return *this;
 }
 void SplitBlockAlgorithm::printTo(std::ostream& out) const {
@@ -3972,22 +4071,22 @@ void swap(BloomFilterAlgorithm &a, BloomFilterAlgorithm &b) {
   swap(a.__isset, b.__isset);
 }
 
-BloomFilterAlgorithm::BloomFilterAlgorithm(const BloomFilterAlgorithm& other108) noexcept {
-  BLOCK = other108.BLOCK;
-  __isset = other108.__isset;
+BloomFilterAlgorithm::BloomFilterAlgorithm(const BloomFilterAlgorithm& other112) noexcept {
+  BLOCK = other112.BLOCK;
+  __isset = other112.__isset;
 }
-BloomFilterAlgorithm::BloomFilterAlgorithm(BloomFilterAlgorithm&& other109) noexcept {
-  BLOCK = std::move(other109.BLOCK);
-  __isset = other109.__isset;
+BloomFilterAlgorithm::BloomFilterAlgorithm(BloomFilterAlgorithm&& other113) noexcept {
+  BLOCK = std::move(other113.BLOCK);
+  __isset = other113.__isset;
 }
-BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(const BloomFilterAlgorithm& other110) noexcept {
-  BLOCK = other110.BLOCK;
-  __isset = other110.__isset;
+BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(const BloomFilterAlgorithm& other114) noexcept {
+  BLOCK = other114.BLOCK;
+  __isset = other114.__isset;
   return *this;
 }
-BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(BloomFilterAlgorithm&& other111) noexcept {
-  BLOCK = std::move(other111.BLOCK);
-  __isset = other111.__isset;
+BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(BloomFilterAlgorithm&& other115) noexcept {
+  BLOCK = std::move(other115.BLOCK);
+  __isset = other115.__isset;
   return *this;
 }
 void BloomFilterAlgorithm::printTo(std::ostream& out) const {
@@ -4052,18 +4151,18 @@ void swap(XxHash &a, XxHash &b) {
   (void) b;
 }
 
-XxHash::XxHash(const XxHash& other112) noexcept {
-  (void) other112;
+XxHash::XxHash(const XxHash& other116) noexcept {
+  (void) other116;
 }
-XxHash::XxHash(XxHash&& other113) noexcept {
-  (void) other113;
+XxHash::XxHash(XxHash&& other117) noexcept {
+  (void) other117;
 }
-XxHash& XxHash::operator=(const XxHash& other114) noexcept {
-  (void) other114;
+XxHash& XxHash::operator=(const XxHash& other118) noexcept {
+  (void) other118;
   return *this;
 }
-XxHash& XxHash::operator=(XxHash&& other115) noexcept {
-  (void) other115;
+XxHash& XxHash::operator=(XxHash&& other119) noexcept {
+  (void) other119;
   return *this;
 }
 void XxHash::printTo(std::ostream& out) const {
@@ -4150,22 +4249,22 @@ void swap(BloomFilterHash &a, BloomFilterHash &b) {
   swap(a.__isset, b.__isset);
 }
 
-BloomFilterHash::BloomFilterHash(const BloomFilterHash& other116) noexcept {
-  XXHASH = other116.XXHASH;
-  __isset = other116.__isset;
+BloomFilterHash::BloomFilterHash(const BloomFilterHash& other120) noexcept {
+  XXHASH = other120.XXHASH;
+  __isset = other120.__isset;
 }
-BloomFilterHash::BloomFilterHash(BloomFilterHash&& other117) noexcept {
-  XXHASH = std::move(other117.XXHASH);
-  __isset = other117.__isset;
+BloomFilterHash::BloomFilterHash(BloomFilterHash&& other121) noexcept {
+  XXHASH = std::move(other121.XXHASH);
+  __isset = other121.__isset;
 }
-BloomFilterHash& BloomFilterHash::operator=(const BloomFilterHash& other118) noexcept {
-  XXHASH = other118.XXHASH;
-  __isset = other118.__isset;
+BloomFilterHash& BloomFilterHash::operator=(const BloomFilterHash& other122) noexcept {
+  XXHASH = other122.XXHASH;
+  __isset = other122.__isset;
   return *this;
 }
-BloomFilterHash& BloomFilterHash::operator=(BloomFilterHash&& other119) noexcept {
-  XXHASH = std::move(other119.XXHASH);
-  __isset = other119.__isset;
+BloomFilterHash& BloomFilterHash::operator=(BloomFilterHash&& other123) noexcept {
+  XXHASH = std::move(other123.XXHASH);
+  __isset = other123.__isset;
   return *this;
 }
 void BloomFilterHash::printTo(std::ostream& out) const {
@@ -4230,18 +4329,18 @@ void swap(Uncompressed &a, Uncompressed &b) {
   (void) b;
 }
 
-Uncompressed::Uncompressed(const Uncompressed& other120) noexcept {
-  (void) other120;
+Uncompressed::Uncompressed(const Uncompressed& other124) noexcept {
+  (void) other124;
 }
-Uncompressed::Uncompressed(Uncompressed&& other121) noexcept {
-  (void) other121;
+Uncompressed::Uncompressed(Uncompressed&& other125) noexcept {
+  (void) other125;
 }
-Uncompressed& Uncompressed::operator=(const Uncompressed& other122) noexcept {
-  (void) other122;
+Uncompressed& Uncompressed::operator=(const Uncompressed& other126) noexcept {
+  (void) other126;
   return *this;
 }
-Uncompressed& Uncompressed::operator=(Uncompressed&& other123) noexcept {
-  (void) other123;
+Uncompressed& Uncompressed::operator=(Uncompressed&& other127) noexcept {
+  (void) other127;
   return *this;
 }
 void Uncompressed::printTo(std::ostream& out) const {
@@ -4328,22 +4427,22 @@ void swap(BloomFilterCompression &a, BloomFilterCompression &b) {
   swap(a.__isset, b.__isset);
 }
 
-BloomFilterCompression::BloomFilterCompression(const BloomFilterCompression& other124) noexcept {
-  UNCOMPRESSED = other124.UNCOMPRESSED;
-  __isset = other124.__isset;
+BloomFilterCompression::BloomFilterCompression(const BloomFilterCompression& other128) noexcept {
+  UNCOMPRESSED = other128.UNCOMPRESSED;
+  __isset = other128.__isset;
 }
-BloomFilterCompression::BloomFilterCompression(BloomFilterCompression&& other125) noexcept {
-  UNCOMPRESSED = std::move(other125.UNCOMPRESSED);
-  __isset = other125.__isset;
+BloomFilterCompression::BloomFilterCompression(BloomFilterCompression&& other129) noexcept {
+  UNCOMPRESSED = std::move(other129.UNCOMPRESSED);
+  __isset = other129.__isset;
 }
-BloomFilterCompression& BloomFilterCompression::operator=(const BloomFilterCompression& other126) noexcept {
-  UNCOMPRESSED = other126.UNCOMPRESSED;
-  __isset = other126.__isset;
+BloomFilterCompression& BloomFilterCompression::operator=(const BloomFilterCompression& other130) noexcept {
+  UNCOMPRESSED = other130.UNCOMPRESSED;
+  __isset = other130.__isset;
   return *this;
 }
-BloomFilterCompression& BloomFilterCompression::operator=(BloomFilterCompression&& other127) noexcept {
-  UNCOMPRESSED = std::move(other127.UNCOMPRESSED);
-  __isset = other127.__isset;
+BloomFilterCompression& BloomFilterCompression::operator=(BloomFilterCompression&& other131) noexcept {
+  UNCOMPRESSED = std::move(other131.UNCOMPRESSED);
+  __isset = other131.__isset;
   return *this;
 }
 void BloomFilterCompression::printTo(std::ostream& out) const {
@@ -4491,30 +4590,30 @@ void swap(BloomFilterHeader &a, BloomFilterHeader &b) {
   swap(a.compression, b.compression);
 }
 
-BloomFilterHeader::BloomFilterHeader(const BloomFilterHeader& other128) noexcept {
-  numBytes = other128.numBytes;
-  algorithm = other128.algorithm;
-  hash = other128.hash;
-  compression = other128.compression;
+BloomFilterHeader::BloomFilterHeader(const BloomFilterHeader& other132) noexcept {
+  numBytes = other132.numBytes;
+  algorithm = other132.algorithm;
+  hash = other132.hash;
+  compression = other132.compression;
 }
-BloomFilterHeader::BloomFilterHeader(BloomFilterHeader&& other129) noexcept {
-  numBytes = other129.numBytes;
-  algorithm = std::move(other129.algorithm);
-  hash = std::move(other129.hash);
-  compression = std::move(other129.compression);
+BloomFilterHeader::BloomFilterHeader(BloomFilterHeader&& other133) noexcept {
+  numBytes = other133.numBytes;
+  algorithm = std::move(other133.algorithm);
+  hash = std::move(other133.hash);
+  compression = std::move(other133.compression);
 }
-BloomFilterHeader& BloomFilterHeader::operator=(const BloomFilterHeader& other130) noexcept {
-  numBytes = other130.numBytes;
-  algorithm = other130.algorithm;
-  hash = other130.hash;
-  compression = other130.compression;
+BloomFilterHeader& BloomFilterHeader::operator=(const BloomFilterHeader& other134) noexcept {
+  numBytes = other134.numBytes;
+  algorithm = other134.algorithm;
+  hash = other134.hash;
+  compression = other134.compression;
   return *this;
 }
-BloomFilterHeader& BloomFilterHeader::operator=(BloomFilterHeader&& other131) noexcept {
-  numBytes = other131.numBytes;
-  algorithm = std::move(other131.algorithm);
-  hash = std::move(other131.hash);
-  compression = std::move(other131.compression);
+BloomFilterHeader& BloomFilterHeader::operator=(BloomFilterHeader&& other135) noexcept {
+  numBytes = other135.numBytes;
+  algorithm = std::move(other135.algorithm);
+  hash = std::move(other135.hash);
+  compression = std::move(other135.compression);
   return *this;
 }
 void BloomFilterHeader::printTo(std::ostream& out) const {
@@ -4601,9 +4700,9 @@ uint32_t PageHeader::read(::apache::thrift::protocol::TProtocol* iprot) {
     {
       case 1:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast132;
-          xfer += iprot->readI32(ecast132);
-          this->type = static_cast<PageType::type>(ecast132);
+          int32_t ecast136;
+          xfer += iprot->readI32(ecast136);
+          this->type = static_cast<PageType::type>(ecast136);
           isset_type = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -4743,50 +4842,50 @@ void swap(PageHeader &a, PageHeader &b) {
   swap(a.__isset, b.__isset);
 }
 
-PageHeader::PageHeader(const PageHeader& other133) {
-  type = other133.type;
-  uncompressed_page_size = other133.uncompressed_page_size;
-  compressed_page_size = other133.compressed_page_size;
-  crc = other133.crc;
-  data_page_header = other133.data_page_header;
-  index_page_header = other133.index_page_header;
-  dictionary_page_header = other133.dictionary_page_header;
-  data_page_header_v2 = other133.data_page_header_v2;
-  __isset = other133.__isset;
-}
-PageHeader::PageHeader(PageHeader&& other134) noexcept {
-  type = other134.type;
-  uncompressed_page_size = other134.uncompressed_page_size;
-  compressed_page_size = other134.compressed_page_size;
-  crc = other134.crc;
-  data_page_header = std::move(other134.data_page_header);
-  index_page_header = std::move(other134.index_page_header);
-  dictionary_page_header = std::move(other134.dictionary_page_header);
-  data_page_header_v2 = std::move(other134.data_page_header_v2);
-  __isset = other134.__isset;
-}
-PageHeader& PageHeader::operator=(const PageHeader& other135) {
-  type = other135.type;
-  uncompressed_page_size = other135.uncompressed_page_size;
-  compressed_page_size = other135.compressed_page_size;
-  crc = other135.crc;
-  data_page_header = other135.data_page_header;
-  index_page_header = other135.index_page_header;
-  dictionary_page_header = other135.dictionary_page_header;
-  data_page_header_v2 = other135.data_page_header_v2;
-  __isset = other135.__isset;
+PageHeader::PageHeader(const PageHeader& other137) {
+  type = other137.type;
+  uncompressed_page_size = other137.uncompressed_page_size;
+  compressed_page_size = other137.compressed_page_size;
+  crc = other137.crc;
+  data_page_header = other137.data_page_header;
+  index_page_header = other137.index_page_header;
+  dictionary_page_header = other137.dictionary_page_header;
+  data_page_header_v2 = other137.data_page_header_v2;
+  __isset = other137.__isset;
+}
+PageHeader::PageHeader(PageHeader&& other138) noexcept {
+  type = other138.type;
+  uncompressed_page_size = other138.uncompressed_page_size;
+  compressed_page_size = other138.compressed_page_size;
+  crc = other138.crc;
+  data_page_header = std::move(other138.data_page_header);
+  index_page_header = std::move(other138.index_page_header);
+  dictionary_page_header = std::move(other138.dictionary_page_header);
+  data_page_header_v2 = std::move(other138.data_page_header_v2);
+  __isset = other138.__isset;
+}
+PageHeader& PageHeader::operator=(const PageHeader& other139) {
+  type = other139.type;
+  uncompressed_page_size = other139.uncompressed_page_size;
+  compressed_page_size = other139.compressed_page_size;
+  crc = other139.crc;
+  data_page_header = other139.data_page_header;
+  index_page_header = other139.index_page_header;
+  dictionary_page_header = other139.dictionary_page_header;
+  data_page_header_v2 = other139.data_page_header_v2;
+  __isset = other139.__isset;
   return *this;
 }
-PageHeader& PageHeader::operator=(PageHeader&& other136) noexcept {
-  type = other136.type;
-  uncompressed_page_size = other136.uncompressed_page_size;
-  compressed_page_size = other136.compressed_page_size;
-  crc = other136.crc;
-  data_page_header = std::move(other136.data_page_header);
-  index_page_header = std::move(other136.index_page_header);
-  dictionary_page_header = std::move(other136.dictionary_page_header);
-  data_page_header_v2 = std::move(other136.data_page_header_v2);
-  __isset = other136.__isset;
+PageHeader& PageHeader::operator=(PageHeader&& other140) noexcept {
+  type = other140.type;
+  uncompressed_page_size = other140.uncompressed_page_size;
+  compressed_page_size = other140.compressed_page_size;
+  crc = other140.crc;
+  data_page_header = std::move(other140.data_page_header);
+  index_page_header = std::move(other140.index_page_header);
+  dictionary_page_header = std::move(other140.dictionary_page_header);
+  data_page_header_v2 = std::move(other140.data_page_header_v2);
+  __isset = other140.__isset;
   return *this;
 }
 void PageHeader::printTo(std::ostream& out) const {
@@ -4901,26 +5000,26 @@ void swap(KeyValue &a, KeyValue &b) {
   swap(a.__isset, b.__isset);
 }
 
-KeyValue::KeyValue(const KeyValue& other137) {
-  key = other137.key;
-  value = other137.value;
-  __isset = other137.__isset;
+KeyValue::KeyValue(const KeyValue& other141) {
+  key = other141.key;
+  value = other141.value;
+  __isset = other141.__isset;
 }
-KeyValue::KeyValue(KeyValue&& other138) noexcept {
-  key = std::move(other138.key);
-  value = std::move(other138.value);
-  __isset = other138.__isset;
+KeyValue::KeyValue(KeyValue&& other142) noexcept {
+  key = std::move(other142.key);
+  value = std::move(other142.value);
+  __isset = other142.__isset;
 }
-KeyValue& KeyValue::operator=(const KeyValue& other139) {
-  key = other139.key;
-  value = other139.value;
-  __isset = other139.__isset;
+KeyValue& KeyValue::operator=(const KeyValue& other143) {
+  key = other143.key;
+  value = other143.value;
+  __isset = other143.__isset;
   return *this;
 }
-KeyValue& KeyValue::operator=(KeyValue&& other140) noexcept {
-  key = std::move(other140.key);
-  value = std::move(other140.value);
-  __isset = other140.__isset;
+KeyValue& KeyValue::operator=(KeyValue&& other144) noexcept {
+  key = std::move(other144.key);
+  value = std::move(other144.value);
+  __isset = other144.__isset;
   return *this;
 }
 void KeyValue::printTo(std::ostream& out) const {
@@ -5049,26 +5148,26 @@ void swap(SortingColumn &a, SortingColumn &b) {
   swap(a.nulls_first, b.nulls_first);
 }
 
-SortingColumn::SortingColumn(const SortingColumn& other141) noexcept {
-  column_idx = other141.column_idx;
-  descending = other141.descending;
-  nulls_first = other141.nulls_first;
+SortingColumn::SortingColumn(const SortingColumn& other145) noexcept {
+  column_idx = other145.column_idx;
+  descending = other145.descending;
+  nulls_first = other145.nulls_first;
 }
-SortingColumn::SortingColumn(SortingColumn&& other142) noexcept {
-  column_idx = other142.column_idx;
-  descending = other142.descending;
-  nulls_first = other142.nulls_first;
+SortingColumn::SortingColumn(SortingColumn&& other146) noexcept {
+  column_idx = other146.column_idx;
+  descending = other146.descending;
+  nulls_first = other146.nulls_first;
 }
-SortingColumn& SortingColumn::operator=(const SortingColumn& other143) noexcept {
-  column_idx = other143.column_idx;
-  descending = other143.descending;
-  nulls_first = other143.nulls_first;
+SortingColumn& SortingColumn::operator=(const SortingColumn& other147) noexcept {
+  column_idx = other147.column_idx;
+  descending = other147.descending;
+  nulls_first = other147.nulls_first;
   return *this;
 }
-SortingColumn& SortingColumn::operator=(SortingColumn&& other144) noexcept {
-  column_idx = other144.column_idx;
-  descending = other144.descending;
-  nulls_first = other144.nulls_first;
+SortingColumn& SortingColumn::operator=(SortingColumn&& other148) noexcept {
+  column_idx = other148.column_idx;
+  descending = other148.descending;
+  nulls_first = other148.nulls_first;
   return *this;
 }
 void SortingColumn::printTo(std::ostream& out) const {
@@ -5129,9 +5228,9 @@ uint32_t PageEncodingStats::read(::apache::thrift::protocol::TProtocol* iprot) {
     {
       case 1:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast145;
-          xfer += iprot->readI32(ecast145);
-          this->page_type = static_cast<PageType::type>(ecast145);
+          int32_t ecast149;
+          xfer += iprot->readI32(ecast149);
+          this->page_type = static_cast<PageType::type>(ecast149);
           isset_page_type = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -5139,9 +5238,9 @@ uint32_t PageEncodingStats::read(::apache::thrift::protocol::TProtocol* iprot) {
         break;
       case 2:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast146;
-          xfer += iprot->readI32(ecast146);
-          this->encoding = static_cast<Encoding::type>(ecast146);
+          int32_t ecast150;
+          xfer += iprot->readI32(ecast150);
+          this->encoding = static_cast<Encoding::type>(ecast150);
           isset_encoding = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -5202,26 +5301,26 @@ void swap(PageEncodingStats &a, PageEncodingStats &b) {
   swap(a.count, b.count);
 }
 
-PageEncodingStats::PageEncodingStats(const PageEncodingStats& other147) noexcept {
-  page_type = other147.page_type;
-  encoding = other147.encoding;
-  count = other147.count;
+PageEncodingStats::PageEncodingStats(const PageEncodingStats& other151) noexcept {
+  page_type = other151.page_type;
+  encoding = other151.encoding;
+  count = other151.count;
 }
-PageEncodingStats::PageEncodingStats(PageEncodingStats&& other148) noexcept {
-  page_type = other148.page_type;
-  encoding = other148.encoding;
-  count = other148.count;
+PageEncodingStats::PageEncodingStats(PageEncodingStats&& other152) noexcept {
+  page_type = other152.page_type;
+  encoding = other152.encoding;
+  count = other152.count;
 }
-PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other149) noexcept {
-  page_type = other149.page_type;
-  encoding = other149.encoding;
-  count = other149.count;
+PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other153) noexcept {
+  page_type = other153.page_type;
+  encoding = other153.encoding;
+  count = other153.count;
   return *this;
 }
-PageEncodingStats& PageEncodingStats::operator=(PageEncodingStats&& other150) noexcept {
-  page_type = other150.page_type;
-  encoding = other150.encoding;
-  count = other150.count;
+PageEncodingStats& PageEncodingStats::operator=(PageEncodingStats&& other154) noexcept {
+  page_type = other154.page_type;
+  encoding = other154.encoding;
+  count = other154.count;
   return *this;
 }
 void PageEncodingStats::printTo(std::ostream& out) const {
@@ -5337,9 +5436,9 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
     {
       case 1:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast151;
-          xfer += iprot->readI32(ecast151);
-          this->type = static_cast<Type::type>(ecast151);
+          int32_t ecast155;
+          xfer += iprot->readI32(ecast155);
+          this->type = static_cast<Type::type>(ecast155);
           isset_type = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -5349,16 +5448,16 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->encodings.clear();
-            uint32_t _size152;
-            ::apache::thrift::protocol::TType _etype155;
-            xfer += iprot->readListBegin(_etype155, _size152);
-            this->encodings.resize(_size152);
-            uint32_t _i156;
-            for (_i156 = 0; _i156 < _size152; ++_i156)
+            uint32_t _size156;
+            ::apache::thrift::protocol::TType _etype159;
+            xfer += iprot->readListBegin(_etype159, _size156);
+            this->encodings.resize(_size156);
+            uint32_t _i160;
+            for (_i160 = 0; _i160 < _size156; ++_i160)
             {
-              int32_t ecast157;
-              xfer += iprot->readI32(ecast157);
-              this->encodings[_i156] = static_cast<Encoding::type>(ecast157);
+              int32_t ecast161;
+              xfer += iprot->readI32(ecast161);
+              this->encodings[_i160] = static_cast<Encoding::type>(ecast161);
             }
             xfer += iprot->readListEnd();
           }
@@ -5371,14 +5470,14 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->path_in_schema.clear();
-            uint32_t _size158;
-            ::apache::thrift::protocol::TType _etype161;
-            xfer += iprot->readListBegin(_etype161, _size158);
-            this->path_in_schema.resize(_size158);
-            uint32_t _i162;
-            for (_i162 = 0; _i162 < _size158; ++_i162)
+            uint32_t _size162;
+            ::apache::thrift::protocol::TType _etype165;
+            xfer += iprot->readListBegin(_etype165, _size162);
+            this->path_in_schema.resize(_size162);
+            uint32_t _i166;
+            for (_i166 = 0; _i166 < _size162; ++_i166)
             {
-              xfer += iprot->readString(this->path_in_schema[_i162]);
+              xfer += iprot->readString(this->path_in_schema[_i166]);
             }
             xfer += iprot->readListEnd();
           }
@@ -5389,9 +5488,9 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
         break;
       case 4:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast163;
-          xfer += iprot->readI32(ecast163);
-          this->codec = static_cast<CompressionCodec::type>(ecast163);
+          int32_t ecast167;
+          xfer += iprot->readI32(ecast167);
+          this->codec = static_cast<CompressionCodec::type>(ecast167);
           isset_codec = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -5425,14 +5524,14 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->key_value_metadata.clear();
-            uint32_t _size164;
-            ::apache::thrift::protocol::TType _etype167;
-            xfer += iprot->readListBegin(_etype167, _size164);
-            this->key_value_metadata.resize(_size164);
-            uint32_t _i168;
-            for (_i168 = 0; _i168 < _size164; ++_i168)
+            uint32_t _size168;
+            ::apache::thrift::protocol::TType _etype171;
+            xfer += iprot->readListBegin(_etype171, _size168);
+            this->key_value_metadata.resize(_size168);
+            uint32_t _i172;
+            for (_i172 = 0; _i172 < _size168; ++_i172)
             {
-              xfer += this->key_value_metadata[_i168].read(iprot);
+              xfer += this->key_value_metadata[_i172].read(iprot);
             }
             xfer += iprot->readListEnd();
           }
@@ -5477,14 +5576,14 @@ uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->encoding_stats.clear();
-            uint32_t _size169;
-            ::apache::thrift::protocol::TType _etype172;
-            xfer += iprot->readListBegin(_etype172, _size169);
-            this->encoding_stats.resize(_size169);
-            uint32_t _i173;
-            for (_i173 = 0; _i173 < _size169; ++_i173)
+            uint32_t _size173;
+            ::apache::thrift::protocol::TType _etype176;
+            xfer += iprot->readListBegin(_etype176, _size173);
+            this->encoding_stats.resize(_size173);
+            uint32_t _i177;
+            for (_i177 = 0; _i177 < _size173; ++_i177)
             {
-              xfer += this->encoding_stats[_i173].read(iprot);
+              xfer += this->encoding_stats[_i177].read(iprot);
             }
             xfer += iprot->readListEnd();
           }
@@ -5541,10 +5640,10 @@ uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) con
   xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast<uint32_t>(this->encodings.size()));
-    std::vector<Encoding::type> ::const_iterator _iter174;
-    for (_iter174 = this->encodings.begin(); _iter174 != this->encodings.end(); ++_iter174)
+    std::vector<Encoding::type> ::const_iterator _iter178;
+    for (_iter178 = this->encodings.begin(); _iter178 != this->encodings.end(); ++_iter178)
     {
-      xfer += oprot->writeI32(static_cast<int32_t>((*_iter174)));
+      xfer += oprot->writeI32(static_cast<int32_t>((*_iter178)));
     }
     xfer += oprot->writeListEnd();
   }
@@ -5553,10 +5652,10 @@ uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) con
   xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->path_in_schema.size()));
-    std::vector<std::string> ::const_iterator _iter175;
-    for (_iter175 = this->path_in_schema.begin(); _iter175 != this->path_in_schema.end(); ++_iter175)
+    std::vector<std::string> ::const_iterator _iter179;
+    for (_iter179 = this->path_in_schema.begin(); _iter179 != this->path_in_schema.end(); ++_iter179)
     {
-      xfer += oprot->writeString((*_iter175));
+      xfer += oprot->writeString((*_iter179));
     }
     xfer += oprot->writeListEnd();
   }
@@ -5582,10 +5681,10 @@ uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) con
     xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8);
     {
       xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->key_value_metadata.size()));
-      std::vector<KeyValue> ::const_iterator _iter176;
-      for (_iter176 = this->key_value_metadata.begin(); _iter176 != this->key_value_metadata.end(); ++_iter176)
+      std::vector<KeyValue> ::const_iterator _iter180;
+      for (_iter180 = this->key_value_metadata.begin(); _iter180 != this->key_value_metadata.end(); ++_iter180)
       {
-        xfer += (*_iter176).write(oprot);
+        xfer += (*_iter180).write(oprot);
       }
       xfer += oprot->writeListEnd();
     }
@@ -5614,10 +5713,10 @@ uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) con
     xfer += oprot->writeFieldBegin("encoding_stats", ::apache::thrift::protocol::T_LIST, 13);
     {
       xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->encoding_stats.size()));
-      std::vector<PageEncodingStats> ::const_iterator _iter177;
-      for (_iter177 = this->encoding_stats.begin(); _iter177 != this->encoding_stats.end(); ++_iter177)
+      std::vector<PageEncodingStats> ::const_iterator _iter181;
+      for (_iter181 = this->encoding_stats.begin(); _iter181 != this->encoding_stats.end(); ++_iter181)
       {
-        xfer += (*_iter177).write(oprot);
+        xfer += (*_iter181).write(oprot);
       }
       xfer += oprot->writeListEnd();
     }
@@ -5652,74 +5751,74 @@ void swap(ColumnMetaData &a, ColumnMetaData &b) {
   swap(a.__isset, b.__isset);
 }
 
-ColumnMetaData::ColumnMetaData(const ColumnMetaData& other178) {
-  type = other178.type;
-  encodings = other178.encodings;
-  path_in_schema = other178.path_in_schema;
-  codec = other178.codec;
-  num_values = other178.num_values;
-  total_uncompressed_size = other178.total_uncompressed_size;
-  total_compressed_size = other178.total_compressed_size;
-  key_value_metadata = other178.key_value_metadata;
-  data_page_offset = other178.data_page_offset;
-  index_page_offset = other178.index_page_offset;
-  dictionary_page_offset = other178.dictionary_page_offset;
-  statistics = other178.statistics;
-  encoding_stats = other178.encoding_stats;
-  bloom_filter_offset = other178.bloom_filter_offset;
-  __isset = other178.__isset;
-}
-ColumnMetaData::ColumnMetaData(ColumnMetaData&& other179) noexcept {
-  type = other179.type;
-  encodings = std::move(other179.encodings);
-  path_in_schema = std::move(other179.path_in_schema);
-  codec = other179.codec;
-  num_values = other179.num_values;
-  total_uncompressed_size = other179.total_uncompressed_size;
-  total_compressed_size = other179.total_compressed_size;
-  key_value_metadata = std::move(other179.key_value_metadata);
-  data_page_offset = other179.data_page_offset;
-  index_page_offset = other179.index_page_offset;
-  dictionary_page_offset = other179.dictionary_page_offset;
-  statistics = std::move(other179.statistics);
-  encoding_stats = std::move(other179.encoding_stats);
-  bloom_filter_offset = other179.bloom_filter_offset;
-  __isset = other179.__isset;
-}
-ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other180) {
-  type = other180.type;
-  encodings = other180.encodings;
-  path_in_schema = other180.path_in_schema;
-  codec = other180.codec;
-  num_values = other180.num_values;
-  total_uncompressed_size = other180.total_uncompressed_size;
-  total_compressed_size = other180.total_compressed_size;
-  key_value_metadata = other180.key_value_metadata;
-  data_page_offset = other180.data_page_offset;
-  index_page_offset = other180.index_page_offset;
-  dictionary_page_offset = other180.dictionary_page_offset;
-  statistics = other180.statistics;
-  encoding_stats = other180.encoding_stats;
-  bloom_filter_offset = other180.bloom_filter_offset;
-  __isset = other180.__isset;
+ColumnMetaData::ColumnMetaData(const ColumnMetaData& other182) {
+  type = other182.type;
+  encodings = other182.encodings;
+  path_in_schema = other182.path_in_schema;
+  codec = other182.codec;
+  num_values = other182.num_values;
+  total_uncompressed_size = other182.total_uncompressed_size;
+  total_compressed_size = other182.total_compressed_size;
+  key_value_metadata = other182.key_value_metadata;
+  data_page_offset = other182.data_page_offset;
+  index_page_offset = other182.index_page_offset;
+  dictionary_page_offset = other182.dictionary_page_offset;
+  statistics = other182.statistics;
+  encoding_stats = other182.encoding_stats;
+  bloom_filter_offset = other182.bloom_filter_offset;
+  __isset = other182.__isset;
+}
+ColumnMetaData::ColumnMetaData(ColumnMetaData&& other183) noexcept {
+  type = other183.type;
+  encodings = std::move(other183.encodings);
+  path_in_schema = std::move(other183.path_in_schema);
+  codec = other183.codec;
+  num_values = other183.num_values;
+  total_uncompressed_size = other183.total_uncompressed_size;
+  total_compressed_size = other183.total_compressed_size;
+  key_value_metadata = std::move(other183.key_value_metadata);
+  data_page_offset = other183.data_page_offset;
+  index_page_offset = other183.index_page_offset;
+  dictionary_page_offset = other183.dictionary_page_offset;
+  statistics = std::move(other183.statistics);
+  encoding_stats = std::move(other183.encoding_stats);
+  bloom_filter_offset = other183.bloom_filter_offset;
+  __isset = other183.__isset;
+}
+ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other184) {
+  type = other184.type;
+  encodings = other184.encodings;
+  path_in_schema = other184.path_in_schema;
+  codec = other184.codec;
+  num_values = other184.num_values;
+  total_uncompressed_size = other184.total_uncompressed_size;
+  total_compressed_size = other184.total_compressed_size;
+  key_value_metadata = other184.key_value_metadata;
+  data_page_offset = other184.data_page_offset;
+  index_page_offset = other184.index_page_offset;
+  dictionary_page_offset = other184.dictionary_page_offset;
+  statistics = other184.statistics;
+  encoding_stats = other184.encoding_stats;
+  bloom_filter_offset = other184.bloom_filter_offset;
+  __isset = other184.__isset;
   return *this;
 }
-ColumnMetaData& ColumnMetaData::operator=(ColumnMetaData&& other181) noexcept {
-  type = other181.type;
-  encodings = std::move(other181.encodings);
-  path_in_schema = std::move(other181.path_in_schema);
-  codec = other181.codec;
-  num_values = other181.num_values;
-  total_uncompressed_size = other181.total_uncompressed_size;
-  total_compressed_size = other181.total_compressed_size;
-  key_value_metadata = std::move(other181.key_value_metadata);
-  data_page_offset = other181.data_page_offset;
-  index_page_offset = other181.index_page_offset;
-  dictionary_page_offset = other181.dictionary_page_offset;
-  statistics = std::move(other181.statistics);
-  encoding_stats = std::move(other181.encoding_stats);
-  bloom_filter_offset = other181.bloom_filter_offset;
-  __isset = other181.__isset;
+ColumnMetaData& ColumnMetaData::operator=(ColumnMetaData&& other185) noexcept {
+  type = other185.type;
+  encodings = std::move(other185.encodings);
+  path_in_schema = std::move(other185.path_in_schema);
+  codec = other185.codec;
+  num_values = other185.num_values;
+  total_uncompressed_size = other185.total_uncompressed_size;
+  total_compressed_size = other185.total_compressed_size;
+  key_value_metadata = std::move(other185.key_value_metadata);
+  data_page_offset = other185.data_page_offset;
+  index_page_offset = other185.index_page_offset;
+  dictionary_page_offset = other185.dictionary_page_offset;
+  statistics = std::move(other185.statistics);
+  encoding_stats = std::move(other185.encoding_stats);
+  bloom_filter_offset = other185.bloom_filter_offset;
+  __isset = other185.__isset;
   return *this;
 }
 void ColumnMetaData::printTo(std::ostream& out) const {
@@ -5797,18 +5896,18 @@ void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) {
   (void) b;
 }
 
-EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other182) noexcept {
-  (void) other182;
+EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other186) noexcept {
+  (void) other186;
 }
-EncryptionWithFooterKey::EncryptionWithFooterKey(EncryptionWithFooterKey&& other183) noexcept {
-  (void) other183;
+EncryptionWithFooterKey::EncryptionWithFooterKey(EncryptionWithFooterKey&& other187) noexcept {
+  (void) other187;
 }
-EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other184) noexcept {
-  (void) other184;
+EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other188) noexcept {
+  (void) other188;
   return *this;
 }
-EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(EncryptionWithFooterKey&& other185) noexcept {
-  (void) other185;
+EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(EncryptionWithFooterKey&& other189) noexcept {
+  (void) other189;
   return *this;
 }
 void EncryptionWithFooterKey::printTo(std::ostream& out) const {
@@ -5863,14 +5962,14 @@ uint32_t EncryptionWithColumnKey::read(::apache::thrift::protocol::TProtocol* ip
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->path_in_schema.clear();
-            uint32_t _size186;
-            ::apache::thrift::protocol::TType _etype189;
-            xfer += iprot->readListBegin(_etype189, _size186);
-            this->path_in_schema.resize(_size186);
-            uint32_t _i190;
-            for (_i190 = 0; _i190 < _size186; ++_i190)
+            uint32_t _size190;
+            ::apache::thrift::protocol::TType _etype193;
+            xfer += iprot->readListBegin(_etype193, _size190);
+            this->path_in_schema.resize(_size190);
+            uint32_t _i194;
+            for (_i194 = 0; _i194 < _size190; ++_i194)
             {
-              xfer += iprot->readString(this->path_in_schema[_i190]);
+              xfer += iprot->readString(this->path_in_schema[_i194]);
             }
             xfer += iprot->readListEnd();
           }
@@ -5909,10 +6008,10 @@ uint32_t EncryptionWithColumnKey::write(::apache::thrift::protocol::TProtocol* o
   xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 1);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->path_in_schema.size()));
-    std::vector<std::string> ::const_iterator _iter191;
-    for (_iter191 = this->path_in_schema.begin(); _iter191 != this->path_in_schema.end(); ++_iter191)
+    std::vector<std::string> ::const_iterator _iter195;
+    for (_iter195 = this->path_in_schema.begin(); _iter195 != this->path_in_schema.end(); ++_iter195)
     {
-      xfer += oprot->writeString((*_iter191));
+      xfer += oprot->writeString((*_iter195));
     }
     xfer += oprot->writeListEnd();
   }
@@ -5935,26 +6034,26 @@ void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) {
   swap(a.__isset, b.__isset);
 }
 
-EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other192) {
-  path_in_schema = other192.path_in_schema;
-  key_metadata = other192.key_metadata;
-  __isset = other192.__isset;
+EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other196) {
+  path_in_schema = other196.path_in_schema;
+  key_metadata = other196.key_metadata;
+  __isset = other196.__isset;
 }
-EncryptionWithColumnKey::EncryptionWithColumnKey(EncryptionWithColumnKey&& other193) noexcept {
-  path_in_schema = std::move(other193.path_in_schema);
-  key_metadata = std::move(other193.key_metadata);
-  __isset = other193.__isset;
+EncryptionWithColumnKey::EncryptionWithColumnKey(EncryptionWithColumnKey&& other197) noexcept {
+  path_in_schema = std::move(other197.path_in_schema);
+  key_metadata = std::move(other197.key_metadata);
+  __isset = other197.__isset;
 }
-EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other194) {
-  path_in_schema = other194.path_in_schema;
-  key_metadata = other194.key_metadata;
-  __isset = other194.__isset;
+EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other198) {
+  path_in_schema = other198.path_in_schema;
+  key_metadata = other198.key_metadata;
+  __isset = other198.__isset;
   return *this;
 }
-EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(EncryptionWithColumnKey&& other195) noexcept {
-  path_in_schema = std::move(other195.path_in_schema);
-  key_metadata = std::move(other195.key_metadata);
-  __isset = other195.__isset;
+EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(EncryptionWithColumnKey&& other199) noexcept {
+  path_in_schema = std::move(other199.path_in_schema);
+  key_metadata = std::move(other199.key_metadata);
+  __isset = other199.__isset;
   return *this;
 }
 void EncryptionWithColumnKey::printTo(std::ostream& out) const {
@@ -6062,26 +6161,26 @@ void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) {
   swap(a.__isset, b.__isset);
 }
 
-ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other196) {
-  ENCRYPTION_WITH_FOOTER_KEY = other196.ENCRYPTION_WITH_FOOTER_KEY;
-  ENCRYPTION_WITH_COLUMN_KEY = other196.ENCRYPTION_WITH_COLUMN_KEY;
-  __isset = other196.__isset;
+ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other200) {
+  ENCRYPTION_WITH_FOOTER_KEY = other200.ENCRYPTION_WITH_FOOTER_KEY;
+  ENCRYPTION_WITH_COLUMN_KEY = other200.ENCRYPTION_WITH_COLUMN_KEY;
+  __isset = other200.__isset;
 }
-ColumnCryptoMetaData::ColumnCryptoMetaData(ColumnCryptoMetaData&& other197) noexcept {
-  ENCRYPTION_WITH_FOOTER_KEY = std::move(other197.ENCRYPTION_WITH_FOOTER_KEY);
-  ENCRYPTION_WITH_COLUMN_KEY = std::move(other197.ENCRYPTION_WITH_COLUMN_KEY);
-  __isset = other197.__isset;
+ColumnCryptoMetaData::ColumnCryptoMetaData(ColumnCryptoMetaData&& other201) noexcept {
+  ENCRYPTION_WITH_FOOTER_KEY = std::move(other201.ENCRYPTION_WITH_FOOTER_KEY);
+  ENCRYPTION_WITH_COLUMN_KEY = std::move(other201.ENCRYPTION_WITH_COLUMN_KEY);
+  __isset = other201.__isset;
 }
-ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other198) {
-  ENCRYPTION_WITH_FOOTER_KEY = other198.ENCRYPTION_WITH_FOOTER_KEY;
-  ENCRYPTION_WITH_COLUMN_KEY = other198.ENCRYPTION_WITH_COLUMN_KEY;
-  __isset = other198.__isset;
+ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other202) {
+  ENCRYPTION_WITH_FOOTER_KEY = other202.ENCRYPTION_WITH_FOOTER_KEY;
+  ENCRYPTION_WITH_COLUMN_KEY = other202.ENCRYPTION_WITH_COLUMN_KEY;
+  __isset = other202.__isset;
   return *this;
 }
-ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(ColumnCryptoMetaData&& other199) noexcept {
-  ENCRYPTION_WITH_FOOTER_KEY = std::move(other199.ENCRYPTION_WITH_FOOTER_KEY);
-  ENCRYPTION_WITH_COLUMN_KEY = std::move(other199.ENCRYPTION_WITH_COLUMN_KEY);
-  __isset = other199.__isset;
+ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(ColumnCryptoMetaData&& other203) noexcept {
+  ENCRYPTION_WITH_FOOTER_KEY = std::move(other203.ENCRYPTION_WITH_FOOTER_KEY);
+  ENCRYPTION_WITH_COLUMN_KEY = std::move(other203.ENCRYPTION_WITH_COLUMN_KEY);
+  __isset = other203.__isset;
   return *this;
 }
 void ColumnCryptoMetaData::printTo(std::ostream& out) const {
@@ -6323,54 +6422,54 @@ void swap(ColumnChunk &a, ColumnChunk &b) {
   swap(a.__isset, b.__isset);
 }
 
-ColumnChunk::ColumnChunk(const ColumnChunk& other200) {
-  file_path = other200.file_path;
-  file_offset = other200.file_offset;
-  meta_data = other200.meta_data;
-  offset_index_offset = other200.offset_index_offset;
-  offset_index_length = other200.offset_index_length;
-  column_index_offset = other200.column_index_offset;
-  column_index_length = other200.column_index_length;
-  crypto_metadata = other200.crypto_metadata;
-  encrypted_column_metadata = other200.encrypted_column_metadata;
-  __isset = other200.__isset;
-}
-ColumnChunk::ColumnChunk(ColumnChunk&& other201) noexcept {
-  file_path = std::move(other201.file_path);
-  file_offset = other201.file_offset;
-  meta_data = std::move(other201.meta_data);
-  offset_index_offset = other201.offset_index_offset;
-  offset_index_length = other201.offset_index_length;
-  column_index_offset = other201.column_index_offset;
-  column_index_length = other201.column_index_length;
-  crypto_metadata = std::move(other201.crypto_metadata);
-  encrypted_column_metadata = std::move(other201.encrypted_column_metadata);
-  __isset = other201.__isset;
-}
-ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other202) {
-  file_path = other202.file_path;
-  file_offset = other202.file_offset;
-  meta_data = other202.meta_data;
-  offset_index_offset = other202.offset_index_offset;
-  offset_index_length = other202.offset_index_length;
-  column_index_offset = other202.column_index_offset;
-  column_index_length = other202.column_index_length;
-  crypto_metadata = other202.crypto_metadata;
-  encrypted_column_metadata = other202.encrypted_column_metadata;
-  __isset = other202.__isset;
+ColumnChunk::ColumnChunk(const ColumnChunk& other204) {
+  file_path = other204.file_path;
+  file_offset = other204.file_offset;
+  meta_data = other204.meta_data;
+  offset_index_offset = other204.offset_index_offset;
+  offset_index_length = other204.offset_index_length;
+  column_index_offset = other204.column_index_offset;
+  column_index_length = other204.column_index_length;
+  crypto_metadata = other204.crypto_metadata;
+  encrypted_column_metadata = other204.encrypted_column_metadata;
+  __isset = other204.__isset;
+}
+ColumnChunk::ColumnChunk(ColumnChunk&& other205) noexcept {
+  file_path = std::move(other205.file_path);
+  file_offset = other205.file_offset;
+  meta_data = std::move(other205.meta_data);
+  offset_index_offset = other205.offset_index_offset;
+  offset_index_length = other205.offset_index_length;
+  column_index_offset = other205.column_index_offset;
+  column_index_length = other205.column_index_length;
+  crypto_metadata = std::move(other205.crypto_metadata);
+  encrypted_column_metadata = std::move(other205.encrypted_column_metadata);
+  __isset = other205.__isset;
+}
+ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other206) {
+  file_path = other206.file_path;
+  file_offset = other206.file_offset;
+  meta_data = other206.meta_data;
+  offset_index_offset = other206.offset_index_offset;
+  offset_index_length = other206.offset_index_length;
+  column_index_offset = other206.column_index_offset;
+  column_index_length = other206.column_index_length;
+  crypto_metadata = other206.crypto_metadata;
+  encrypted_column_metadata = other206.encrypted_column_metadata;
+  __isset = other206.__isset;
   return *this;
 }
-ColumnChunk& ColumnChunk::operator=(ColumnChunk&& other203) noexcept {
-  file_path = std::move(other203.file_path);
-  file_offset = other203.file_offset;
-  meta_data = std::move(other203.meta_data);
-  offset_index_offset = other203.offset_index_offset;
-  offset_index_length = other203.offset_index_length;
-  column_index_offset = other203.column_index_offset;
-  column_index_length = other203.column_index_length;
-  crypto_metadata = std::move(other203.crypto_metadata);
-  encrypted_column_metadata = std::move(other203.encrypted_column_metadata);
-  __isset = other203.__isset;
+ColumnChunk& ColumnChunk::operator=(ColumnChunk&& other207) noexcept {
+  file_path = std::move(other207.file_path);
+  file_offset = other207.file_offset;
+  meta_data = std::move(other207.meta_data);
+  offset_index_offset = other207.offset_index_offset;
+  offset_index_length = other207.offset_index_length;
+  column_index_offset = other207.column_index_offset;
+  column_index_length = other207.column_index_length;
+  crypto_metadata = std::move(other207.crypto_metadata);
+  encrypted_column_metadata = std::move(other207.encrypted_column_metadata);
+  __isset = other207.__isset;
   return *this;
 }
 void ColumnChunk::printTo(std::ostream& out) const {
@@ -6459,14 +6558,14 @@ uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->columns.clear();
-            uint32_t _size204;
-            ::apache::thrift::protocol::TType _etype207;
-            xfer += iprot->readListBegin(_etype207, _size204);
-            this->columns.resize(_size204);
-            uint32_t _i208;
-            for (_i208 = 0; _i208 < _size204; ++_i208)
+            uint32_t _size208;
+            ::apache::thrift::protocol::TType _etype211;
+            xfer += iprot->readListBegin(_etype211, _size208);
+            this->columns.resize(_size208);
+            uint32_t _i212;
+            for (_i212 = 0; _i212 < _size208; ++_i212)
             {
-              xfer += this->columns[_i208].read(iprot);
+              xfer += this->columns[_i212].read(iprot);
             }
             xfer += iprot->readListEnd();
           }
@@ -6495,14 +6594,14 @@ uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->sorting_columns.clear();
-            uint32_t _size209;
-            ::apache::thrift::protocol::TType _etype212;
-            xfer += iprot->readListBegin(_etype212, _size209);
-            this->sorting_columns.resize(_size209);
-            uint32_t _i213;
-            for (_i213 = 0; _i213 < _size209; ++_i213)
+            uint32_t _size213;
+            ::apache::thrift::protocol::TType _etype216;
+            xfer += iprot->readListBegin(_etype216, _size213);
+            this->sorting_columns.resize(_size213);
+            uint32_t _i217;
+            for (_i217 = 0; _i217 < _size213; ++_i217)
             {
-              xfer += this->sorting_columns[_i213].read(iprot);
+              xfer += this->sorting_columns[_i217].read(iprot);
             }
             xfer += iprot->readListEnd();
           }
@@ -6561,10 +6660,10 @@ uint32_t RowGroup::write(::apache::thrift::protocol::TProtocol* oprot) const {
   xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->columns.size()));
-    std::vector<ColumnChunk> ::const_iterator _iter214;
-    for (_iter214 = this->columns.begin(); _iter214 != this->columns.end(); ++_iter214)
+    std::vector<ColumnChunk> ::const_iterator _iter218;
+    for (_iter218 = this->columns.begin(); _iter218 != this->columns.end(); ++_iter218)
     {
-      xfer += (*_iter214).write(oprot);
+      xfer += (*_iter218).write(oprot);
     }
     xfer += oprot->writeListEnd();
   }
@@ -6582,10 +6681,10 @@ uint32_t RowGroup::write(::apache::thrift::protocol::TProtocol* oprot) const {
     xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4);
     {
       xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->sorting_columns.size()));
-      std::vector<SortingColumn> ::const_iterator _iter215;
-      for (_iter215 = this->sorting_columns.begin(); _iter215 != this->sorting_columns.end(); ++_iter215)
+      std::vector<SortingColumn> ::const_iterator _iter219;
+      for (_iter219 = this->sorting_columns.begin(); _iter219 != this->sorting_columns.end(); ++_iter219)
       {
-        xfer += (*_iter215).write(oprot);
+        xfer += (*_iter219).write(oprot);
       }
       xfer += oprot->writeListEnd();
     }
@@ -6623,46 +6722,46 @@ void swap(RowGroup &a, RowGroup &b) {
   swap(a.__isset, b.__isset);
 }
 
-RowGroup::RowGroup(const RowGroup& other216) {
-  columns = other216.columns;
-  total_byte_size = other216.total_byte_size;
-  num_rows = other216.num_rows;
-  sorting_columns = other216.sorting_columns;
-  file_offset = other216.file_offset;
-  total_compressed_size = other216.total_compressed_size;
-  ordinal = other216.ordinal;
-  __isset = other216.__isset;
-}
-RowGroup::RowGroup(RowGroup&& other217) noexcept {
-  columns = std::move(other217.columns);
-  total_byte_size = other217.total_byte_size;
-  num_rows = other217.num_rows;
-  sorting_columns = std::move(other217.sorting_columns);
-  file_offset = other217.file_offset;
-  total_compressed_size = other217.total_compressed_size;
-  ordinal = other217.ordinal;
-  __isset = other217.__isset;
-}
-RowGroup& RowGroup::operator=(const RowGroup& other218) {
-  columns = other218.columns;
-  total_byte_size = other218.total_byte_size;
-  num_rows = other218.num_rows;
-  sorting_columns = other218.sorting_columns;
-  file_offset = other218.file_offset;
-  total_compressed_size = other218.total_compressed_size;
-  ordinal = other218.ordinal;
-  __isset = other218.__isset;
+RowGroup::RowGroup(const RowGroup& other220) {
+  columns = other220.columns;
+  total_byte_size = other220.total_byte_size;
+  num_rows = other220.num_rows;
+  sorting_columns = other220.sorting_columns;
+  file_offset = other220.file_offset;
+  total_compressed_size = other220.total_compressed_size;
+  ordinal = other220.ordinal;
+  __isset = other220.__isset;
+}
+RowGroup::RowGroup(RowGroup&& other221) noexcept {
+  columns = std::move(other221.columns);
+  total_byte_size = other221.total_byte_size;
+  num_rows = other221.num_rows;
+  sorting_columns = std::move(other221.sorting_columns);
+  file_offset = other221.file_offset;
+  total_compressed_size = other221.total_compressed_size;
+  ordinal = other221.ordinal;
+  __isset = other221.__isset;
+}
+RowGroup& RowGroup::operator=(const RowGroup& other222) {
+  columns = other222.columns;
+  total_byte_size = other222.total_byte_size;
+  num_rows = other222.num_rows;
+  sorting_columns = other222.sorting_columns;
+  file_offset = other222.file_offset;
+  total_compressed_size = other222.total_compressed_size;
+  ordinal = other222.ordinal;
+  __isset = other222.__isset;
   return *this;
 }
-RowGroup& RowGroup::operator=(RowGroup&& other219) noexcept {
-  columns = std::move(other219.columns);
-  total_byte_size = other219.total_byte_size;
-  num_rows = other219.num_rows;
-  sorting_columns = std::move(other219.sorting_columns);
-  file_offset = other219.file_offset;
-  total_compressed_size = other219.total_compressed_size;
-  ordinal = other219.ordinal;
-  __isset = other219.__isset;
+RowGroup& RowGroup::operator=(RowGroup&& other223) noexcept {
+  columns = std::move(other223.columns);
+  total_byte_size = other223.total_byte_size;
+  num_rows = other223.num_rows;
+  sorting_columns = std::move(other223.sorting_columns);
+  file_offset = other223.file_offset;
+  total_compressed_size = other223.total_compressed_size;
+  ordinal = other223.ordinal;
+  __isset = other223.__isset;
   return *this;
 }
 void RowGroup::printTo(std::ostream& out) const {
@@ -6733,18 +6832,18 @@ void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) {
   (void) b;
 }
 
-TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other220) noexcept {
-  (void) other220;
+TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other224) noexcept {
+  (void) other224;
 }
-TypeDefinedOrder::TypeDefinedOrder(TypeDefinedOrder&& other221) noexcept {
-  (void) other221;
+TypeDefinedOrder::TypeDefinedOrder(TypeDefinedOrder&& other225) noexcept {
+  (void) other225;
 }
-TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other222) noexcept {
-  (void) other222;
+TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other226) noexcept {
+  (void) other226;
   return *this;
 }
-TypeDefinedOrder& TypeDefinedOrder::operator=(TypeDefinedOrder&& other223) noexcept {
-  (void) other223;
+TypeDefinedOrder& TypeDefinedOrder::operator=(TypeDefinedOrder&& other227) noexcept {
+  (void) other227;
   return *this;
 }
 void TypeDefinedOrder::printTo(std::ostream& out) const {
@@ -6831,22 +6930,22 @@ void swap(ColumnOrder &a, ColumnOrder &b) {
   swap(a.__isset, b.__isset);
 }
 
-ColumnOrder::ColumnOrder(const ColumnOrder& other224) noexcept {
-  TYPE_ORDER = other224.TYPE_ORDER;
-  __isset = other224.__isset;
+ColumnOrder::ColumnOrder(const ColumnOrder& other228) noexcept {
+  TYPE_ORDER = other228.TYPE_ORDER;
+  __isset = other228.__isset;
 }
-ColumnOrder::ColumnOrder(ColumnOrder&& other225) noexcept {
-  TYPE_ORDER = std::move(other225.TYPE_ORDER);
-  __isset = other225.__isset;
+ColumnOrder::ColumnOrder(ColumnOrder&& other229) noexcept {
+  TYPE_ORDER = std::move(other229.TYPE_ORDER);
+  __isset = other229.__isset;
 }
-ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other226) noexcept {
-  TYPE_ORDER = other226.TYPE_ORDER;
-  __isset = other226.__isset;
+ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other230) noexcept {
+  TYPE_ORDER = other230.TYPE_ORDER;
+  __isset = other230.__isset;
   return *this;
 }
-ColumnOrder& ColumnOrder::operator=(ColumnOrder&& other227) noexcept {
-  TYPE_ORDER = std::move(other227.TYPE_ORDER);
-  __isset = other227.__isset;
+ColumnOrder& ColumnOrder::operator=(ColumnOrder&& other231) noexcept {
+  TYPE_ORDER = std::move(other231.TYPE_ORDER);
+  __isset = other231.__isset;
   return *this;
 }
 void ColumnOrder::printTo(std::ostream& out) const {
@@ -6974,26 +7073,26 @@ void swap(PageLocation &a, PageLocation &b) {
   swap(a.first_row_index, b.first_row_index);
 }
 
-PageLocation::PageLocation(const PageLocation& other228) noexcept {
-  offset = other228.offset;
-  compressed_page_size = other228.compressed_page_size;
-  first_row_index = other228.first_row_index;
+PageLocation::PageLocation(const PageLocation& other232) noexcept {
+  offset = other232.offset;
+  compressed_page_size = other232.compressed_page_size;
+  first_row_index = other232.first_row_index;
 }
-PageLocation::PageLocation(PageLocation&& other229) noexcept {
-  offset = other229.offset;
-  compressed_page_size = other229.compressed_page_size;
-  first_row_index = other229.first_row_index;
+PageLocation::PageLocation(PageLocation&& other233) noexcept {
+  offset = other233.offset;
+  compressed_page_size = other233.compressed_page_size;
+  first_row_index = other233.first_row_index;
 }
-PageLocation& PageLocation::operator=(const PageLocation& other230) noexcept {
-  offset = other230.offset;
-  compressed_page_size = other230.compressed_page_size;
-  first_row_index = other230.first_row_index;
+PageLocation& PageLocation::operator=(const PageLocation& other234) noexcept {
+  offset = other234.offset;
+  compressed_page_size = other234.compressed_page_size;
+  first_row_index = other234.first_row_index;
   return *this;
 }
-PageLocation& PageLocation::operator=(PageLocation&& other231) noexcept {
-  offset = other231.offset;
-  compressed_page_size = other231.compressed_page_size;
-  first_row_index = other231.first_row_index;
+PageLocation& PageLocation::operator=(PageLocation&& other235) noexcept {
+  offset = other235.offset;
+  compressed_page_size = other235.compressed_page_size;
+  first_row_index = other235.first_row_index;
   return *this;
 }
 void PageLocation::printTo(std::ostream& out) const {
@@ -7046,14 +7145,14 @@ uint32_t OffsetIndex::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->page_locations.clear();
-            uint32_t _size232;
-            ::apache::thrift::protocol::TType _etype235;
-            xfer += iprot->readListBegin(_etype235, _size232);
-            this->page_locations.resize(_size232);
-            uint32_t _i236;
-            for (_i236 = 0; _i236 < _size232; ++_i236)
+            uint32_t _size236;
+            ::apache::thrift::protocol::TType _etype239;
+            xfer += iprot->readListBegin(_etype239, _size236);
+            this->page_locations.resize(_size236);
+            uint32_t _i240;
+            for (_i240 = 0; _i240 < _size236; ++_i240)
             {
-              xfer += this->page_locations[_i236].read(iprot);
+              xfer += this->page_locations[_i240].read(iprot);
             }
             xfer += iprot->readListEnd();
           }
@@ -7084,10 +7183,10 @@ uint32_t OffsetIndex::write(::apache::thrift::protocol::TProtocol* oprot) const
   xfer += oprot->writeFieldBegin("page_locations", ::apache::thrift::protocol::T_LIST, 1);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->page_locations.size()));
-    std::vector<PageLocation> ::const_iterator _iter237;
-    for (_iter237 = this->page_locations.begin(); _iter237 != this->page_locations.end(); ++_iter237)
+    std::vector<PageLocation> ::const_iterator _iter241;
+    for (_iter241 = this->page_locations.begin(); _iter241 != this->page_locations.end(); ++_iter241)
     {
-      xfer += (*_iter237).write(oprot);
+      xfer += (*_iter241).write(oprot);
     }
     xfer += oprot->writeListEnd();
   }
@@ -7103,18 +7202,18 @@ void swap(OffsetIndex &a, OffsetIndex &b) {
   swap(a.page_locations, b.page_locations);
 }
 
-OffsetIndex::OffsetIndex(const OffsetIndex& other238) {
-  page_locations = other238.page_locations;
+OffsetIndex::OffsetIndex(const OffsetIndex& other242) {
+  page_locations = other242.page_locations;
 }
-OffsetIndex::OffsetIndex(OffsetIndex&& other239) noexcept {
-  page_locations = std::move(other239.page_locations);
+OffsetIndex::OffsetIndex(OffsetIndex&& other243) noexcept {
+  page_locations = std::move(other243.page_locations);
 }
-OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other240) {
-  page_locations = other240.page_locations;
+OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other244) {
+  page_locations = other244.page_locations;
   return *this;
 }
-OffsetIndex& OffsetIndex::operator=(OffsetIndex&& other241) noexcept {
-  page_locations = std::move(other241.page_locations);
+OffsetIndex& OffsetIndex::operator=(OffsetIndex&& other245) noexcept {
+  page_locations = std::move(other245.page_locations);
   return *this;
 }
 void OffsetIndex::printTo(std::ostream& out) const {
@@ -7185,14 +7284,14 @@ uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->null_pages.clear();
-            uint32_t _size242;
-            ::apache::thrift::protocol::TType _etype245;
-            xfer += iprot->readListBegin(_etype245, _size242);
-            this->null_pages.resize(_size242);
-            uint32_t _i246;
-            for (_i246 = 0; _i246 < _size242; ++_i246)
+            uint32_t _size246;
+            ::apache::thrift::protocol::TType _etype249;
+            xfer += iprot->readListBegin(_etype249, _size246);
+            this->null_pages.resize(_size246);
+            uint32_t _i250;
+            for (_i250 = 0; _i250 < _size246; ++_i250)
             {
-              xfer += iprot->readBool(this->null_pages[_i246]);
+              xfer += iprot->readBool(this->null_pages[_i250]);
             }
             xfer += iprot->readListEnd();
           }
@@ -7205,14 +7304,14 @@ uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->min_values.clear();
-            uint32_t _size247;
-            ::apache::thrift::protocol::TType _etype250;
-            xfer += iprot->readListBegin(_etype250, _size247);
-            this->min_values.resize(_size247);
-            uint32_t _i251;
-            for (_i251 = 0; _i251 < _size247; ++_i251)
+            uint32_t _size251;
+            ::apache::thrift::protocol::TType _etype254;
+            xfer += iprot->readListBegin(_etype254, _size251);
+            this->min_values.resize(_size251);
+            uint32_t _i255;
+            for (_i255 = 0; _i255 < _size251; ++_i255)
             {
-              xfer += iprot->readBinary(this->min_values[_i251]);
+              xfer += iprot->readBinary(this->min_values[_i255]);
             }
             xfer += iprot->readListEnd();
           }
@@ -7225,14 +7324,14 @@ uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->max_values.clear();
-            uint32_t _size252;
-            ::apache::thrift::protocol::TType _etype255;
-            xfer += iprot->readListBegin(_etype255, _size252);
-            this->max_values.resize(_size252);
-            uint32_t _i256;
-            for (_i256 = 0; _i256 < _size252; ++_i256)
+            uint32_t _size256;
+            ::apache::thrift::protocol::TType _etype259;
+            xfer += iprot->readListBegin(_etype259, _size256);
+            this->max_values.resize(_size256);
+            uint32_t _i260;
+            for (_i260 = 0; _i260 < _size256; ++_i260)
             {
-              xfer += iprot->readBinary(this->max_values[_i256]);
+              xfer += iprot->readBinary(this->max_values[_i260]);
             }
             xfer += iprot->readListEnd();
           }
@@ -7243,9 +7342,9 @@ uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) {
         break;
       case 4:
         if (ftype == ::apache::thrift::protocol::T_I32) {
-          int32_t ecast257;
-          xfer += iprot->readI32(ecast257);
-          this->boundary_order = static_cast<BoundaryOrder::type>(ecast257);
+          int32_t ecast261;
+          xfer += iprot->readI32(ecast261);
+          this->boundary_order = static_cast<BoundaryOrder::type>(ecast261);
           isset_boundary_order = true;
         } else {
           xfer += iprot->skip(ftype);
@@ -7255,14 +7354,14 @@ uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->null_counts.clear();
-            uint32_t _size258;
-            ::apache::thrift::protocol::TType _etype261;
-            xfer += iprot->readListBegin(_etype261, _size258);
-            this->null_counts.resize(_size258);
-            uint32_t _i262;
-            for (_i262 = 0; _i262 < _size258; ++_i262)
+            uint32_t _size262;
+            ::apache::thrift::protocol::TType _etype265;
+            xfer += iprot->readListBegin(_etype265, _size262);
+            this->null_counts.resize(_size262);
+            uint32_t _i266;
+            for (_i266 = 0; _i266 < _size262; ++_i266)
             {
-              xfer += iprot->readI64(this->null_counts[_i262]);
+              xfer += iprot->readI64(this->null_counts[_i266]);
             }
             xfer += iprot->readListEnd();
           }
@@ -7299,10 +7398,10 @@ uint32_t ColumnIndex::write(::apache::thrift::protocol::TProtocol* oprot) const
   xfer += oprot->writeFieldBegin("null_pages", ::apache::thrift::protocol::T_LIST, 1);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_BOOL, static_cast<uint32_t>(this->null_pages.size()));
-    std::vector<bool> ::const_iterator _iter263;
-    for (_iter263 = this->null_pages.begin(); _iter263 != this->null_pages.end(); ++_iter263)
+    std::vector<bool> ::const_iterator _iter267;
+    for (_iter267 = this->null_pages.begin(); _iter267 != this->null_pages.end(); ++_iter267)
     {
-      xfer += oprot->writeBool((*_iter263));
+      xfer += oprot->writeBool((*_iter267));
     }
     xfer += oprot->writeListEnd();
   }
@@ -7311,10 +7410,10 @@ uint32_t ColumnIndex::write(::apache::thrift::protocol::TProtocol* oprot) const
   xfer += oprot->writeFieldBegin("min_values", ::apache::thrift::protocol::T_LIST, 2);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->min_values.size()));
-    std::vector<std::string> ::const_iterator _iter264;
-    for (_iter264 = this->min_values.begin(); _iter264 != this->min_values.end(); ++_iter264)
+    std::vector<std::string> ::const_iterator _iter268;
+    for (_iter268 = this->min_values.begin(); _iter268 != this->min_values.end(); ++_iter268)
     {
-      xfer += oprot->writeBinary((*_iter264));
+      xfer += oprot->writeBinary((*_iter268));
     }
     xfer += oprot->writeListEnd();
   }
@@ -7323,10 +7422,10 @@ uint32_t ColumnIndex::write(::apache::thrift::protocol::TProtocol* oprot) const
   xfer += oprot->writeFieldBegin("max_values", ::apache::thrift::protocol::T_LIST, 3);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast<uint32_t>(this->max_values.size()));
-    std::vector<std::string> ::const_iterator _iter265;
-    for (_iter265 = this->max_values.begin(); _iter265 != this->max_values.end(); ++_iter265)
+    std::vector<std::string> ::const_iterator _iter269;
+    for (_iter269 = this->max_values.begin(); _iter269 != this->max_values.end(); ++_iter269)
     {
-      xfer += oprot->writeBinary((*_iter265));
+      xfer += oprot->writeBinary((*_iter269));
     }
     xfer += oprot->writeListEnd();
   }
@@ -7340,10 +7439,10 @@ uint32_t ColumnIndex::write(::apache::thrift::protocol::TProtocol* oprot) const
     xfer += oprot->writeFieldBegin("null_counts", ::apache::thrift::protocol::T_LIST, 5);
     {
       xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast<uint32_t>(this->null_counts.size()));
-      std::vector<int64_t> ::const_iterator _iter266;
-      for (_iter266 = this->null_counts.begin(); _iter266 != this->null_counts.end(); ++_iter266)
+      std::vector<int64_t> ::const_iterator _iter270;
+      for (_iter270 = this->null_counts.begin(); _iter270 != this->null_counts.end(); ++_iter270)
       {
-        xfer += oprot->writeI64((*_iter266));
+        xfer += oprot->writeI64((*_iter270));
       }
       xfer += oprot->writeListEnd();
     }
@@ -7364,38 +7463,38 @@ void swap(ColumnIndex &a, ColumnIndex &b) {
   swap(a.__isset, b.__isset);
 }
 
-ColumnIndex::ColumnIndex(const ColumnIndex& other267) {
-  null_pages = other267.null_pages;
-  min_values = other267.min_values;
-  max_values = other267.max_values;
-  boundary_order = other267.boundary_order;
-  null_counts = other267.null_counts;
-  __isset = other267.__isset;
-}
-ColumnIndex::ColumnIndex(ColumnIndex&& other268) noexcept {
-  null_pages = std::move(other268.null_pages);
-  min_values = std::move(other268.min_values);
-  max_values = std::move(other268.max_values);
-  boundary_order = other268.boundary_order;
-  null_counts = std::move(other268.null_counts);
-  __isset = other268.__isset;
-}
-ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other269) {
-  null_pages = other269.null_pages;
-  min_values = other269.min_values;
-  max_values = other269.max_values;
-  boundary_order = other269.boundary_order;
-  null_counts = other269.null_counts;
-  __isset = other269.__isset;
+ColumnIndex::ColumnIndex(const ColumnIndex& other271) {
+  null_pages = other271.null_pages;
+  min_values = other271.min_values;
+  max_values = other271.max_values;
+  boundary_order = other271.boundary_order;
+  null_counts = other271.null_counts;
+  __isset = other271.__isset;
+}
+ColumnIndex::ColumnIndex(ColumnIndex&& other272) noexcept {
+  null_pages = std::move(other272.null_pages);
+  min_values = std::move(other272.min_values);
+  max_values = std::move(other272.max_values);
+  boundary_order = other272.boundary_order;
+  null_counts = std::move(other272.null_counts);
+  __isset = other272.__isset;
+}
+ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other273) {
+  null_pages = other273.null_pages;
+  min_values = other273.min_values;
+  max_values = other273.max_values;
+  boundary_order = other273.boundary_order;
+  null_counts = other273.null_counts;
+  __isset = other273.__isset;
   return *this;
 }
-ColumnIndex& ColumnIndex::operator=(ColumnIndex&& other270) noexcept {
-  null_pages = std::move(other270.null_pages);
-  min_values = std::move(other270.min_values);
-  max_values = std::move(other270.max_values);
-  boundary_order = other270.boundary_order;
-  null_counts = std::move(other270.null_counts);
-  __isset = other270.__isset;
+ColumnIndex& ColumnIndex::operator=(ColumnIndex&& other274) noexcept {
+  null_pages = std::move(other274.null_pages);
+  min_values = std::move(other274.min_values);
+  max_values = std::move(other274.max_values);
+  boundary_order = other274.boundary_order;
+  null_counts = std::move(other274.null_counts);
+  __isset = other274.__isset;
   return *this;
 }
 void ColumnIndex::printTo(std::ostream& out) const {
@@ -7525,30 +7624,30 @@ void swap(AesGcmV1 &a, AesGcmV1 &b) {
   swap(a.__isset, b.__isset);
 }
 
-AesGcmV1::AesGcmV1(const AesGcmV1& other271) {
-  aad_prefix = other271.aad_prefix;
-  aad_file_unique = other271.aad_file_unique;
-  supply_aad_prefix = other271.supply_aad_prefix;
-  __isset = other271.__isset;
+AesGcmV1::AesGcmV1(const AesGcmV1& other275) {
+  aad_prefix = other275.aad_prefix;
+  aad_file_unique = other275.aad_file_unique;
+  supply_aad_prefix = other275.supply_aad_prefix;
+  __isset = other275.__isset;
 }
-AesGcmV1::AesGcmV1(AesGcmV1&& other272) noexcept {
-  aad_prefix = std::move(other272.aad_prefix);
-  aad_file_unique = std::move(other272.aad_file_unique);
-  supply_aad_prefix = other272.supply_aad_prefix;
-  __isset = other272.__isset;
+AesGcmV1::AesGcmV1(AesGcmV1&& other276) noexcept {
+  aad_prefix = std::move(other276.aad_prefix);
+  aad_file_unique = std::move(other276.aad_file_unique);
+  supply_aad_prefix = other276.supply_aad_prefix;
+  __isset = other276.__isset;
 }
-AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other273) {
-  aad_prefix = other273.aad_prefix;
-  aad_file_unique = other273.aad_file_unique;
-  supply_aad_prefix = other273.supply_aad_prefix;
-  __isset = other273.__isset;
+AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other277) {
+  aad_prefix = other277.aad_prefix;
+  aad_file_unique = other277.aad_file_unique;
+  supply_aad_prefix = other277.supply_aad_prefix;
+  __isset = other277.__isset;
   return *this;
 }
-AesGcmV1& AesGcmV1::operator=(AesGcmV1&& other274) noexcept {
-  aad_prefix = std::move(other274.aad_prefix);
-  aad_file_unique = std::move(other274.aad_file_unique);
-  supply_aad_prefix = other274.supply_aad_prefix;
-  __isset = other274.__isset;
+AesGcmV1& AesGcmV1::operator=(AesGcmV1&& other278) noexcept {
+  aad_prefix = std::move(other278.aad_prefix);
+  aad_file_unique = std::move(other278.aad_file_unique);
+  supply_aad_prefix = other278.supply_aad_prefix;
+  __isset = other278.__isset;
   return *this;
 }
 void AesGcmV1::printTo(std::ostream& out) const {
@@ -7676,30 +7775,30 @@ void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) {
   swap(a.__isset, b.__isset);
 }
 
-AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other275) {
-  aad_prefix = other275.aad_prefix;
-  aad_file_unique = other275.aad_file_unique;
-  supply_aad_prefix = other275.supply_aad_prefix;
-  __isset = other275.__isset;
+AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other279) {
+  aad_prefix = other279.aad_prefix;
+  aad_file_unique = other279.aad_file_unique;
+  supply_aad_prefix = other279.supply_aad_prefix;
+  __isset = other279.__isset;
 }
-AesGcmCtrV1::AesGcmCtrV1(AesGcmCtrV1&& other276) noexcept {
-  aad_prefix = std::move(other276.aad_prefix);
-  aad_file_unique = std::move(other276.aad_file_unique);
-  supply_aad_prefix = other276.supply_aad_prefix;
-  __isset = other276.__isset;
+AesGcmCtrV1::AesGcmCtrV1(AesGcmCtrV1&& other280) noexcept {
+  aad_prefix = std::move(other280.aad_prefix);
+  aad_file_unique = std::move(other280.aad_file_unique);
+  supply_aad_prefix = other280.supply_aad_prefix;
+  __isset = other280.__isset;
 }
-AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other277) {
-  aad_prefix = other277.aad_prefix;
-  aad_file_unique = other277.aad_file_unique;
-  supply_aad_prefix = other277.supply_aad_prefix;
-  __isset = other277.__isset;
+AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other281) {
+  aad_prefix = other281.aad_prefix;
+  aad_file_unique = other281.aad_file_unique;
+  supply_aad_prefix = other281.supply_aad_prefix;
+  __isset = other281.__isset;
   return *this;
 }
-AesGcmCtrV1& AesGcmCtrV1::operator=(AesGcmCtrV1&& other278) noexcept {
-  aad_prefix = std::move(other278.aad_prefix);
-  aad_file_unique = std::move(other278.aad_file_unique);
-  supply_aad_prefix = other278.supply_aad_prefix;
-  __isset = other278.__isset;
+AesGcmCtrV1& AesGcmCtrV1::operator=(AesGcmCtrV1&& other282) noexcept {
+  aad_prefix = std::move(other282.aad_prefix);
+  aad_file_unique = std::move(other282.aad_file_unique);
+  supply_aad_prefix = other282.supply_aad_prefix;
+  __isset = other282.__isset;
   return *this;
 }
 void AesGcmCtrV1::printTo(std::ostream& out) const {
@@ -7808,26 +7907,26 @@ void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) {
   swap(a.__isset, b.__isset);
 }
 
-EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other279) {
-  AES_GCM_V1 = other279.AES_GCM_V1;
-  AES_GCM_CTR_V1 = other279.AES_GCM_CTR_V1;
-  __isset = other279.__isset;
+EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other283) {
+  AES_GCM_V1 = other283.AES_GCM_V1;
+  AES_GCM_CTR_V1 = other283.AES_GCM_CTR_V1;
+  __isset = other283.__isset;
 }
-EncryptionAlgorithm::EncryptionAlgorithm(EncryptionAlgorithm&& other280) noexcept {
-  AES_GCM_V1 = std::move(other280.AES_GCM_V1);
-  AES_GCM_CTR_V1 = std::move(other280.AES_GCM_CTR_V1);
-  __isset = other280.__isset;
+EncryptionAlgorithm::EncryptionAlgorithm(EncryptionAlgorithm&& other284) noexcept {
+  AES_GCM_V1 = std::move(other284.AES_GCM_V1);
+  AES_GCM_CTR_V1 = std::move(other284.AES_GCM_CTR_V1);
+  __isset = other284.__isset;
 }
-EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other281) {
-  AES_GCM_V1 = other281.AES_GCM_V1;
-  AES_GCM_CTR_V1 = other281.AES_GCM_CTR_V1;
-  __isset = other281.__isset;
+EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other285) {
+  AES_GCM_V1 = other285.AES_GCM_V1;
+  AES_GCM_CTR_V1 = other285.AES_GCM_CTR_V1;
+  __isset = other285.__isset;
   return *this;
 }
-EncryptionAlgorithm& EncryptionAlgorithm::operator=(EncryptionAlgorithm&& other282) noexcept {
-  AES_GCM_V1 = std::move(other282.AES_GCM_V1);
-  AES_GCM_CTR_V1 = std::move(other282.AES_GCM_CTR_V1);
-  __isset = other282.__isset;
+EncryptionAlgorithm& EncryptionAlgorithm::operator=(EncryptionAlgorithm&& other286) noexcept {
+  AES_GCM_V1 = std::move(other286.AES_GCM_V1);
+  AES_GCM_CTR_V1 = std::move(other286.AES_GCM_CTR_V1);
+  __isset = other286.__isset;
   return *this;
 }
 void EncryptionAlgorithm::printTo(std::ostream& out) const {
@@ -7927,14 +8026,14 @@ uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->schema.clear();
-            uint32_t _size283;
-            ::apache::thrift::protocol::TType _etype286;
-            xfer += iprot->readListBegin(_etype286, _size283);
-            this->schema.resize(_size283);
-            uint32_t _i287;
-            for (_i287 = 0; _i287 < _size283; ++_i287)
+            uint32_t _size287;
+            ::apache::thrift::protocol::TType _etype290;
+            xfer += iprot->readListBegin(_etype290, _size287);
+            this->schema.resize(_size287);
+            uint32_t _i291;
+            for (_i291 = 0; _i291 < _size287; ++_i291)
             {
-              xfer += this->schema[_i287].read(iprot);
+              xfer += this->schema[_i291].read(iprot);
             }
             xfer += iprot->readListEnd();
           }
@@ -7955,14 +8054,14 @@ uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->row_groups.clear();
-            uint32_t _size288;
-            ::apache::thrift::protocol::TType _etype291;
-            xfer += iprot->readListBegin(_etype291, _size288);
-            this->row_groups.resize(_size288);
-            uint32_t _i292;
-            for (_i292 = 0; _i292 < _size288; ++_i292)
+            uint32_t _size292;
+            ::apache::thrift::protocol::TType _etype295;
+            xfer += iprot->readListBegin(_etype295, _size292);
+            this->row_groups.resize(_size292);
+            uint32_t _i296;
+            for (_i296 = 0; _i296 < _size292; ++_i296)
             {
-              xfer += this->row_groups[_i292].read(iprot);
+              xfer += this->row_groups[_i296].read(iprot);
             }
             xfer += iprot->readListEnd();
           }
@@ -7975,14 +8074,14 @@ uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->key_value_metadata.clear();
-            uint32_t _size293;
-            ::apache::thrift::protocol::TType _etype296;
-            xfer += iprot->readListBegin(_etype296, _size293);
-            this->key_value_metadata.resize(_size293);
-            uint32_t _i297;
-            for (_i297 = 0; _i297 < _size293; ++_i297)
+            uint32_t _size297;
+            ::apache::thrift::protocol::TType _etype300;
+            xfer += iprot->readListBegin(_etype300, _size297);
+            this->key_value_metadata.resize(_size297);
+            uint32_t _i301;
+            for (_i301 = 0; _i301 < _size297; ++_i301)
             {
-              xfer += this->key_value_metadata[_i297].read(iprot);
+              xfer += this->key_value_metadata[_i301].read(iprot);
             }
             xfer += iprot->readListEnd();
           }
@@ -8003,14 +8102,14 @@ uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) {
         if (ftype == ::apache::thrift::protocol::T_LIST) {
           {
             this->column_orders.clear();
-            uint32_t _size298;
-            ::apache::thrift::protocol::TType _etype301;
-            xfer += iprot->readListBegin(_etype301, _size298);
-            this->column_orders.resize(_size298);
-            uint32_t _i302;
-            for (_i302 = 0; _i302 < _size298; ++_i302)
+            uint32_t _size302;
+            ::apache::thrift::protocol::TType _etype305;
+            xfer += iprot->readListBegin(_etype305, _size302);
+            this->column_orders.resize(_size302);
+            uint32_t _i306;
+            for (_i306 = 0; _i306 < _size302; ++_i306)
             {
-              xfer += this->column_orders[_i302].read(iprot);
+              xfer += this->column_orders[_i306].read(iprot);
             }
             xfer += iprot->readListEnd();
           }
@@ -8067,10 +8166,10 @@ uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const
   xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->schema.size()));
-    std::vector<SchemaElement> ::const_iterator _iter303;
-    for (_iter303 = this->schema.begin(); _iter303 != this->schema.end(); ++_iter303)
+    std::vector<SchemaElement> ::const_iterator _iter307;
+    for (_iter307 = this->schema.begin(); _iter307 != this->schema.end(); ++_iter307)
     {
-      xfer += (*_iter303).write(oprot);
+      xfer += (*_iter307).write(oprot);
     }
     xfer += oprot->writeListEnd();
   }
@@ -8083,10 +8182,10 @@ uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const
   xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4);
   {
     xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->row_groups.size()));
-    std::vector<RowGroup> ::const_iterator _iter304;
-    for (_iter304 = this->row_groups.begin(); _iter304 != this->row_groups.end(); ++_iter304)
+    std::vector<RowGroup> ::const_iterator _iter308;
+    for (_iter308 = this->row_groups.begin(); _iter308 != this->row_groups.end(); ++_iter308)
     {
-      xfer += (*_iter304).write(oprot);
+      xfer += (*_iter308).write(oprot);
     }
     xfer += oprot->writeListEnd();
   }
@@ -8096,10 +8195,10 @@ uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const
     xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5);
     {
       xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->key_value_metadata.size()));
-      std::vector<KeyValue> ::const_iterator _iter305;
-      for (_iter305 = this->key_value_metadata.begin(); _iter305 != this->key_value_metadata.end(); ++_iter305)
+      std::vector<KeyValue> ::const_iterator _iter309;
+      for (_iter309 = this->key_value_metadata.begin(); _iter309 != this->key_value_metadata.end(); ++_iter309)
       {
-        xfer += (*_iter305).write(oprot);
+        xfer += (*_iter309).write(oprot);
       }
       xfer += oprot->writeListEnd();
     }
@@ -8114,10 +8213,10 @@ uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const
     xfer += oprot->writeFieldBegin("column_orders", ::apache::thrift::protocol::T_LIST, 7);
     {
       xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast<uint32_t>(this->column_orders.size()));
-      std::vector<ColumnOrder> ::const_iterator _iter306;
-      for (_iter306 = this->column_orders.begin(); _iter306 != this->column_orders.end(); ++_iter306)
+      std::vector<ColumnOrder> ::const_iterator _iter310;
+      for (_iter310 = this->column_orders.begin(); _iter310 != this->column_orders.end(); ++_iter310)
       {
-        xfer += (*_iter306).write(oprot);
+        xfer += (*_iter310).write(oprot);
       }
       xfer += oprot->writeListEnd();
     }
@@ -8152,54 +8251,54 @@ void swap(FileMetaData &a, FileMetaData &b) {
   swap(a.__isset, b.__isset);
 }
 
-FileMetaData::FileMetaData(const FileMetaData& other307) {
-  version = other307.version;
-  schema = other307.schema;
-  num_rows = other307.num_rows;
-  row_groups = other307.row_groups;
-  key_value_metadata = other307.key_value_metadata;
-  created_by = other307.created_by;
-  column_orders = other307.column_orders;
-  encryption_algorithm = other307.encryption_algorithm;
-  footer_signing_key_metadata = other307.footer_signing_key_metadata;
-  __isset = other307.__isset;
-}
-FileMetaData::FileMetaData(FileMetaData&& other308) noexcept {
-  version = other308.version;
-  schema = std::move(other308.schema);
-  num_rows = other308.num_rows;
-  row_groups = std::move(other308.row_groups);
-  key_value_metadata = std::move(other308.key_value_metadata);
-  created_by = std::move(other308.created_by);
-  column_orders = std::move(other308.column_orders);
-  encryption_algorithm = std::move(other308.encryption_algorithm);
-  footer_signing_key_metadata = std::move(other308.footer_signing_key_metadata);
-  __isset = other308.__isset;
-}
-FileMetaData& FileMetaData::operator=(const FileMetaData& other309) {
-  version = other309.version;
-  schema = other309.schema;
-  num_rows = other309.num_rows;
-  row_groups = other309.row_groups;
-  key_value_metadata = other309.key_value_metadata;
-  created_by = other309.created_by;
-  column_orders = other309.column_orders;
-  encryption_algorithm = other309.encryption_algorithm;
-  footer_signing_key_metadata = other309.footer_signing_key_metadata;
-  __isset = other309.__isset;
+FileMetaData::FileMetaData(const FileMetaData& other311) {
+  version = other311.version;
+  schema = other311.schema;
+  num_rows = other311.num_rows;
+  row_groups = other311.row_groups;
+  key_value_metadata = other311.key_value_metadata;
+  created_by = other311.created_by;
+  column_orders = other311.column_orders;
+  encryption_algorithm = other311.encryption_algorithm;
+  footer_signing_key_metadata = other311.footer_signing_key_metadata;
+  __isset = other311.__isset;
+}
+FileMetaData::FileMetaData(FileMetaData&& other312) noexcept {
+  version = other312.version;
+  schema = std::move(other312.schema);
+  num_rows = other312.num_rows;
+  row_groups = std::move(other312.row_groups);
+  key_value_metadata = std::move(other312.key_value_metadata);
+  created_by = std::move(other312.created_by);
+  column_orders = std::move(other312.column_orders);
+  encryption_algorithm = std::move(other312.encryption_algorithm);
+  footer_signing_key_metadata = std::move(other312.footer_signing_key_metadata);
+  __isset = other312.__isset;
+}
+FileMetaData& FileMetaData::operator=(const FileMetaData& other313) {
+  version = other313.version;
+  schema = other313.schema;
+  num_rows = other313.num_rows;
+  row_groups = other313.row_groups;
+  key_value_metadata = other313.key_value_metadata;
+  created_by = other313.created_by;
+  column_orders = other313.column_orders;
+  encryption_algorithm = other313.encryption_algorithm;
+  footer_signing_key_metadata = other313.footer_signing_key_metadata;
+  __isset = other313.__isset;
   return *this;
 }
-FileMetaData& FileMetaData::operator=(FileMetaData&& other310) noexcept {
-  version = other310.version;
-  schema = std::move(other310.schema);
-  num_rows = other310.num_rows;
-  row_groups = std::move(other310.row_groups);
-  key_value_metadata = std::move(other310.key_value_metadata);
-  created_by = std::move(other310.created_by);
-  column_orders = std::move(other310.column_orders);
-  encryption_algorithm = std::move(other310.encryption_algorithm);
-  footer_signing_key_metadata = std::move(other310.footer_signing_key_metadata);
-  __isset = other310.__isset;
+FileMetaData& FileMetaData::operator=(FileMetaData&& other314) noexcept {
+  version = other314.version;
+  schema = std::move(other314.schema);
+  num_rows = other314.num_rows;
+  row_groups = std::move(other314.row_groups);
+  key_value_metadata = std::move(other314.key_value_metadata);
+  created_by = std::move(other314.created_by);
+  column_orders = std::move(other314.column_orders);
+  encryption_algorithm = std::move(other314.encryption_algorithm);
+  footer_signing_key_metadata = std::move(other314.footer_signing_key_metadata);
+  __isset = other314.__isset;
   return *this;
 }
 void FileMetaData::printTo(std::ostream& out) const {
@@ -8315,26 +8414,26 @@ void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) {
   swap(a.__isset, b.__isset);
 }
 
-FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other311) {
-  encryption_algorithm = other311.encryption_algorithm;
-  key_metadata = other311.key_metadata;
-  __isset = other311.__isset;
+FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other315) {
+  encryption_algorithm = other315.encryption_algorithm;
+  key_metadata = other315.key_metadata;
+  __isset = other315.__isset;
 }
-FileCryptoMetaData::FileCryptoMetaData(FileCryptoMetaData&& other312) noexcept {
-  encryption_algorithm = std::move(other312.encryption_algorithm);
-  key_metadata = std::move(other312.key_metadata);
-  __isset = other312.__isset;
+FileCryptoMetaData::FileCryptoMetaData(FileCryptoMetaData&& other316) noexcept {
+  encryption_algorithm = std::move(other316.encryption_algorithm);
+  key_metadata = std::move(other316.key_metadata);
+  __isset = other316.__isset;
 }
-FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other313) {
-  encryption_algorithm = other313.encryption_algorithm;
-  key_metadata = other313.key_metadata;
-  __isset = other313.__isset;
+FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other317) {
+  encryption_algorithm = other317.encryption_algorithm;
+  key_metadata = other317.key_metadata;
+  __isset = other317.__isset;
   return *this;
 }
-FileCryptoMetaData& FileCryptoMetaData::operator=(FileCryptoMetaData&& other314) noexcept {
-  encryption_algorithm = std::move(other314.encryption_algorithm);
-  key_metadata = std::move(other314.key_metadata);
-  __isset = other314.__isset;
+FileCryptoMetaData& FileCryptoMetaData::operator=(FileCryptoMetaData&& other318) noexcept {
+  encryption_algorithm = std::move(other318.encryption_algorithm);
+  key_metadata = std::move(other318.key_metadata);
+  __isset = other318.__isset;
   return *this;
 }
 void FileCryptoMetaData::printTo(std::ostream& out) const {
diff --git a/cpp/src/generated/parquet_types.h b/cpp/src/generated/parquet_types.h
index 9f468b5051db3..199b4ae747667 100644
--- a/cpp/src/generated/parquet_types.h
+++ b/cpp/src/generated/parquet_types.h
@@ -359,6 +359,8 @@ class EnumType;
 
 class DateType;
 
+class Float16Type;
+
 class NullType;
 
 class DecimalType;
@@ -770,6 +772,39 @@ void swap(DateType &a, DateType &b);
 std::ostream& operator<<(std::ostream& out, const DateType& obj);
 
 
+class Float16Type : public virtual ::apache::thrift::TBase {
+ public:
+
+  Float16Type(const Float16Type&) noexcept;
+  Float16Type(Float16Type&&) noexcept;
+  Float16Type& operator=(const Float16Type&) noexcept;
+  Float16Type& operator=(Float16Type&&) noexcept;
+  Float16Type() noexcept {
+  }
+
+  virtual ~Float16Type() noexcept;
+
+  bool operator == (const Float16Type & /* rhs */) const
+  {
+    return true;
+  }
+  bool operator != (const Float16Type &rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator < (const Float16Type & ) const;
+
+  uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override;
+  uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override;
+
+  virtual void printTo(std::ostream& out) const;
+};
+
+void swap(Float16Type &a, Float16Type &b);
+
+std::ostream& operator<<(std::ostream& out, const Float16Type& obj);
+
+
 /**
  * Logical type to annotate a column that is always null.
  * 
@@ -1253,7 +1288,7 @@ void swap(BsonType &a, BsonType &b);
 std::ostream& operator<<(std::ostream& out, const BsonType& obj);
 
 typedef struct _LogicalType__isset {
-  _LogicalType__isset() : STRING(false), MAP(false), LIST(false), ENUM(false), DECIMAL(false), DATE(false), TIME(false), TIMESTAMP(false), INTEGER(false), UNKNOWN(false), JSON(false), BSON(false), UUID(false) {}
+  _LogicalType__isset() : STRING(false), MAP(false), LIST(false), ENUM(false), DECIMAL(false), DATE(false), TIME(false), TIMESTAMP(false), INTEGER(false), UNKNOWN(false), JSON(false), BSON(false), UUID(false), FLOAT16(false) {}
   bool STRING :1;
   bool MAP :1;
   bool LIST :1;
@@ -1267,6 +1302,7 @@ typedef struct _LogicalType__isset {
   bool JSON :1;
   bool BSON :1;
   bool UUID :1;
+  bool FLOAT16 :1;
 } _LogicalType__isset;
 
 /**
@@ -1300,6 +1336,7 @@ class LogicalType : public virtual ::apache::thrift::TBase {
   JsonType JSON;
   BsonType BSON;
   UUIDType UUID;
+  Float16Type FLOAT16;
 
   _LogicalType__isset __isset;
 
@@ -1329,6 +1366,8 @@ class LogicalType : public virtual ::apache::thrift::TBase {
 
   void __set_UUID(const UUIDType& val);
 
+  void __set_FLOAT16(const Float16Type& val);
+
   bool operator == (const LogicalType & rhs) const
   {
     if (__isset.STRING != rhs.__isset.STRING)
@@ -1383,6 +1422,10 @@ class LogicalType : public virtual ::apache::thrift::TBase {
       return false;
     else if (__isset.UUID && !(UUID == rhs.UUID))
       return false;
+    if (__isset.FLOAT16 != rhs.__isset.FLOAT16)
+      return false;
+    else if (__isset.FLOAT16 && !(FLOAT16 == rhs.FLOAT16))
+      return false;
     return true;
   }
   bool operator != (const LogicalType &rhs) const {
diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index 4e23d0fab5c69..fb9e53870583c 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -143,6 +143,8 @@ std::shared_ptr<const LogicalType> get_logical_type(const DataType& type) {
       return LogicalType::Date();
     case ArrowId::DATE64:
       return LogicalType::Date();
+    case ArrowId::HALF_FLOAT:
+      return LogicalType::Float16();
     case ArrowId::TIMESTAMP: {
       const auto& ts_type = static_cast<const ::arrow::TimestampType&>(type);
       const bool adjusted_to_utc = !(ts_type.timezone().empty());
@@ -220,6 +222,7 @@ ParquetType::type get_physical_type(const DataType& type) {
     case ArrowId::FIXED_SIZE_BINARY:
     case ArrowId::DECIMAL128:
     case ArrowId::DECIMAL256:
+    case ArrowId::HALF_FLOAT:
       return ParquetType::FIXED_LEN_BYTE_ARRAY;
     case ArrowId::DATE32:
       return ParquetType::INT32;
@@ -525,6 +528,9 @@ static std::shared_ptr<GroupNode> MakeSimpleSchema(const DataType& type,
           byte_width =
               static_cast<const ::arrow::FixedSizeBinaryType&>(values_type).byte_width();
           break;
+        case ::arrow::Type::HALF_FLOAT:
+          byte_width = sizeof(::arrow::HalfFloatType::c_type);
+          break;
         case ::arrow::Type::DECIMAL128:
         case ::arrow::Type::DECIMAL256: {
           const auto& decimal_type = static_cast<const DecimalType&>(values_type);
@@ -537,6 +543,9 @@ static std::shared_ptr<GroupNode> MakeSimpleSchema(const DataType& type,
     case ::arrow::Type::FIXED_SIZE_BINARY:
       byte_width = static_cast<const ::arrow::FixedSizeBinaryType&>(type).byte_width();
       break;
+    case ::arrow::Type::HALF_FLOAT:
+      byte_width = sizeof(::arrow::HalfFloatType::c_type);
+      break;
     case ::arrow::Type::DECIMAL128:
     case ::arrow::Type::DECIMAL256: {
       const auto& decimal_type = static_cast<const DecimalType&>(type);
@@ -840,12 +849,12 @@ typedef ::testing::Types<
     ::arrow::BooleanType, ::arrow::UInt8Type, ::arrow::Int8Type, ::arrow::UInt16Type,
     ::arrow::Int16Type, ::arrow::Int32Type, ::arrow::UInt64Type, ::arrow::Int64Type,
     ::arrow::Date32Type, ::arrow::FloatType, ::arrow::DoubleType, ::arrow::StringType,
-    ::arrow::BinaryType, ::arrow::FixedSizeBinaryType, DecimalWithPrecisionAndScale<1>,
-    DecimalWithPrecisionAndScale<5>, DecimalWithPrecisionAndScale<10>,
-    DecimalWithPrecisionAndScale<19>, DecimalWithPrecisionAndScale<23>,
-    DecimalWithPrecisionAndScale<27>, DecimalWithPrecisionAndScale<38>,
-    Decimal256WithPrecisionAndScale<39>, Decimal256WithPrecisionAndScale<56>,
-    Decimal256WithPrecisionAndScale<76>>
+    ::arrow::BinaryType, ::arrow::FixedSizeBinaryType, ::arrow::HalfFloatType,
+    DecimalWithPrecisionAndScale<1>, DecimalWithPrecisionAndScale<5>,
+    DecimalWithPrecisionAndScale<10>, DecimalWithPrecisionAndScale<19>,
+    DecimalWithPrecisionAndScale<23>, DecimalWithPrecisionAndScale<27>,
+    DecimalWithPrecisionAndScale<38>, Decimal256WithPrecisionAndScale<39>,
+    Decimal256WithPrecisionAndScale<56>, Decimal256WithPrecisionAndScale<76>>
     TestTypes;
 
 TYPED_TEST_SUITE(TestParquetIO, TestTypes);
@@ -916,9 +925,15 @@ TYPED_TEST(TestParquetIO, SingleColumnOptionalReadWrite) {
 }
 
 TYPED_TEST(TestParquetIO, SingleColumnOptionalDictionaryWrite) {
-  // Skip tests for BOOL as we don't create dictionaries for it.
-  if (TypeParam::type_id == ::arrow::Type::BOOL) {
-    return;
+  switch (TypeParam::type_id) {
+    case ::arrow::Type::BOOL:
+      GTEST_SKIP() << "dictionaries not created for BOOL";
+      break;
+    case ::arrow::Type::HALF_FLOAT:
+      GTEST_SKIP() << "dictionary_encode not supported for HALF_FLOAT";
+      break;
+    default:
+      break;
   }
 
   std::shared_ptr<Array> values;
diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc
index f11101eb24298..5443214f930d7 100644
--- a/cpp/src/parquet/arrow/arrow_schema_test.cc
+++ b/cpp/src/parquet/arrow/arrow_schema_test.cc
@@ -236,6 +236,8 @@ TEST_F(TestConvertParquetSchema, ParquetAnnotatedFields) {
        ::arrow::fixed_size_binary(12)},
       {"uuid", LogicalType::UUID(), ParquetType::FIXED_LEN_BYTE_ARRAY, 16,
        ::arrow::fixed_size_binary(16)},
+      {"float16", LogicalType::Float16(), ParquetType::FIXED_LEN_BYTE_ARRAY, 2,
+       ::arrow::float16()},
       {"none", LogicalType::None(), ParquetType::BOOLEAN, -1, ::arrow::boolean()},
       {"none", LogicalType::None(), ParquetType::INT32, -1, ::arrow::int32()},
       {"none", LogicalType::None(), ParquetType::INT64, -1, ::arrow::int64()},
@@ -851,6 +853,8 @@ TEST_F(TestConvertArrowSchema, ArrowFields) {
        ParquetType::FIXED_LEN_BYTE_ARRAY, 7},
       {"decimal(32, 8)", ::arrow::decimal(32, 8), LogicalType::Decimal(32, 8),
        ParquetType::FIXED_LEN_BYTE_ARRAY, 14},
+      {"float16", ::arrow::float16(), LogicalType::Float16(),
+       ParquetType::FIXED_LEN_BYTE_ARRAY, 2},
       {"time32", ::arrow::time32(::arrow::TimeUnit::MILLI),
        LogicalType::Time(true, LogicalType::TimeUnit::MILLIS), ParquetType::INT32, -1},
       {"time64(microsecond)", ::arrow::time64(::arrow::TimeUnit::MICRO),
@@ -913,7 +917,8 @@ TEST_F(TestConvertArrowSchema, ArrowNonconvertibleFields) {
   };
 
   std::vector<FieldConstructionArguments> cases = {
-      {"float16", ::arrow::float16()},
+      {"run_end_encoded",
+       ::arrow::run_end_encoded(::arrow::int32(), ::arrow::list(::arrow::int8()))},
   };
 
   for (const FieldConstructionArguments& c : cases) {
diff --git a/cpp/src/parquet/arrow/reader_internal.cc b/cpp/src/parquet/arrow/reader_internal.cc
index 5146aa12c2c36..e5aef5a45b5f3 100644
--- a/cpp/src/parquet/arrow/reader_internal.cc
+++ b/cpp/src/parquet/arrow/reader_internal.cc
@@ -42,6 +42,7 @@
 #include "arrow/util/bit_util.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/endian.h"
+#include "arrow/util/float16.h"
 #include "arrow/util/int_util_overflow.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/ubsan.h"
@@ -82,6 +83,7 @@ using ::arrow::bit_util::FromBigEndian;
 using ::arrow::internal::checked_cast;
 using ::arrow::internal::checked_pointer_cast;
 using ::arrow::internal::SafeLeftShift;
+using ::arrow::util::Float16;
 using ::arrow::util::SafeLoadAs;
 
 using parquet::internal::BinaryRecordReader;
@@ -713,6 +715,17 @@ Status TransferDecimal(RecordReader* reader, MemoryPool* pool,
   return Status::OK();
 }
 
+Status TransferHalfFloat(RecordReader* reader, MemoryPool* pool,
+                         const std::shared_ptr<Field>& field, Datum* out) {
+  static const auto binary_type = ::arrow::fixed_size_binary(2);
+  // Read as a FixedSizeBinaryArray - then, view as a HalfFloatArray
+  std::shared_ptr<ChunkedArray> chunked_array;
+  RETURN_NOT_OK(
+      TransferBinary(reader, pool, field->WithType(binary_type), &chunked_array));
+  ARROW_ASSIGN_OR_RAISE(*out, chunked_array->View(field->type()));
+  return Status::OK();
+}
+
 }  // namespace
 
 #define TRANSFER_INT32(ENUM, ArrowType)                                               \
@@ -772,6 +785,18 @@ Status TransferColumnData(RecordReader* reader, const std::shared_ptr<Field>& va
       RETURN_NOT_OK(TransferBinary(reader, pool, value_field, &chunked_result));
       result = chunked_result;
     } break;
+    case ::arrow::Type::HALF_FLOAT: {
+      const auto& type = *value_field->type();
+      if (descr->physical_type() != ::parquet::Type::FIXED_LEN_BYTE_ARRAY) {
+        return Status::Invalid("Physical type for ", type.ToString(),
+                               " must be fixed length binary");
+      }
+      if (descr->type_length() != type.byte_width()) {
+        return Status::Invalid("Fixed length binary type for ", type.ToString(),
+                               " must have a byte width of ", type.byte_width());
+      }
+      RETURN_NOT_OK(TransferHalfFloat(reader, pool, value_field, &result));
+    } break;
     case ::arrow::Type::DECIMAL128: {
       switch (descr->physical_type()) {
         case ::parquet::Type::INT32: {
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index 3323b7ff8b608..f5484f131eb07 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -397,6 +397,11 @@ Status FieldToNode(const std::string& name, const std::shared_ptr<Field>& field,
     case ArrowTypeId::DURATION:
       type = ParquetType::INT64;
       break;
+    case ArrowTypeId::HALF_FLOAT:
+      type = ParquetType::FIXED_LEN_BYTE_ARRAY;
+      logical_type = LogicalType::Float16();
+      length = sizeof(uint16_t);
+      break;
     case ArrowTypeId::STRUCT: {
       auto struct_type = std::static_pointer_cast<::arrow::StructType>(field->type());
       return StructToNode(struct_type, name, field->nullable(), field_id, properties,
diff --git a/cpp/src/parquet/arrow/schema_internal.cc b/cpp/src/parquet/arrow/schema_internal.cc
index da0427cb31000..bb75cce084097 100644
--- a/cpp/src/parquet/arrow/schema_internal.cc
+++ b/cpp/src/parquet/arrow/schema_internal.cc
@@ -130,6 +130,8 @@ Result<std::shared_ptr<ArrowType>> FromFLBA(const LogicalType& logical_type,
   switch (logical_type.type()) {
     case LogicalType::Type::DECIMAL:
       return MakeArrowDecimal(logical_type);
+    case LogicalType::Type::FLOAT16:
+      return ::arrow::float16();
     case LogicalType::Type::NONE:
     case LogicalType::Type::INTERVAL:
     case LogicalType::Type::UUID:
diff --git a/cpp/src/parquet/arrow/test_util.h b/cpp/src/parquet/arrow/test_util.h
index 16c03130c9672..b2be1b3c5354d 100644
--- a/cpp/src/parquet/arrow/test_util.h
+++ b/cpp/src/parquet/arrow/test_util.h
@@ -33,7 +33,9 @@
 #include "arrow/type_fwd.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/decimal.h"
+#include "arrow/util/float16.h"
 #include "parquet/column_reader.h"
+#include "parquet/test_util.h"
 
 namespace parquet {
 
@@ -70,7 +72,14 @@ ::arrow::enable_if_floating_point<ArrowType, Status> NonNullArray(
     size_t size, std::shared_ptr<Array>* out) {
   using c_type = typename ArrowType::c_type;
   std::vector<c_type> values;
-  ::arrow::random_real(size, 0, static_cast<c_type>(0), static_cast<c_type>(1), &values);
+  if constexpr (::arrow::is_half_float_type<ArrowType>::value) {
+    values.resize(size);
+    test::random_float16_numbers(static_cast<int>(size), 0, ::arrow::util::Float16(0.0f),
+                                 ::arrow::util::Float16(1.0f), values.data());
+  } else {
+    ::arrow::random_real(size, 0, static_cast<c_type>(0), static_cast<c_type>(1),
+                         &values);
+  }
   ::arrow::NumericBuilder<ArrowType> builder;
   RETURN_NOT_OK(builder.AppendValues(values.data(), values.size()));
   return builder.Finish(out);
@@ -201,8 +210,14 @@ ::arrow::enable_if_floating_point<ArrowType, Status> NullableArray(
     size_t size, size_t num_nulls, uint32_t seed, std::shared_ptr<Array>* out) {
   using c_type = typename ArrowType::c_type;
   std::vector<c_type> values;
-  ::arrow::random_real(size, seed, static_cast<c_type>(-1e10), static_cast<c_type>(1e10),
-                       &values);
+  if constexpr (::arrow::is_half_float_type<ArrowType>::value) {
+    values.resize(size);
+    test::random_float16_numbers(static_cast<int>(size), 0, ::arrow::util::Float16(-1e4f),
+                                 ::arrow::util::Float16(1e4f), values.data());
+  } else {
+    ::arrow::random_real(size, seed, static_cast<c_type>(-1e10),
+                         static_cast<c_type>(1e10), &values);
+  }
   std::vector<uint8_t> valid_bytes(size, 1);
 
   for (size_t i = 0; i < num_nulls; i++) {
diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index 5dff533c1cce2..a7e7b2f93e174 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -39,6 +39,7 @@
 #include "arrow/util/compression.h"
 #include "arrow/util/crc32.h"
 #include "arrow/util/endian.h"
+#include "arrow/util/float16.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/rle_encoding.h"
 #include "arrow/util/type_traits.h"
@@ -65,6 +66,7 @@ using arrow::Status;
 using arrow::bit_util::BitWriter;
 using arrow::internal::checked_cast;
 using arrow::internal::checked_pointer_cast;
+using arrow::util::Float16;
 using arrow::util::RleEncoder;
 
 namespace bit_util = arrow::bit_util;
@@ -2295,6 +2297,33 @@ struct SerializeFunctor<
   int64_t* scratch;
 };
 
+// ----------------------------------------------------------------------
+// Write Arrow to Float16
+
+// Requires a custom serializer because Float16s in Parquet are stored as a 2-byte
+// (little-endian) FLBA, whereas in Arrow they're a native `uint16_t`.
+template <>
+struct SerializeFunctor<::parquet::FLBAType, ::arrow::HalfFloatType> {
+  Status Serialize(const ::arrow::HalfFloatArray& array, ArrowWriteContext*, FLBA* out) {
+    const uint16_t* values = array.raw_values();
+    if (array.null_count() == 0) {
+      for (int64_t i = 0; i < array.length(); ++i) {
+        out[i] = ToFLBA(&values[i]);
+      }
+    } else {
+      for (int64_t i = 0; i < array.length(); ++i) {
+        out[i] = array.IsValid(i) ? ToFLBA(&values[i]) : FLBA{};
+      }
+    }
+    return Status::OK();
+  }
+
+ private:
+  FLBA ToFLBA(const uint16_t* value_ptr) const {
+    return FLBA{reinterpret_cast<const uint8_t*>(value_ptr)};
+  }
+};
+
 template <>
 Status TypedColumnWriterImpl<FLBAType>::WriteArrowDense(
     const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels,
@@ -2303,6 +2332,7 @@ Status TypedColumnWriterImpl<FLBAType>::WriteArrowDense(
     WRITE_SERIALIZE_CASE(FIXED_SIZE_BINARY, FixedSizeBinaryType, FLBAType)
     WRITE_SERIALIZE_CASE(DECIMAL128, Decimal128Type, FLBAType)
     WRITE_SERIALIZE_CASE(DECIMAL256, Decimal256Type, FLBAType)
+    WRITE_SERIALIZE_CASE(HALF_FLOAT, HalfFloatType, FLBAType)
     default:
       break;
   }
diff --git a/cpp/src/parquet/page_index_test.cc b/cpp/src/parquet/page_index_test.cc
index 5bfe38522af7b..4db49b4267415 100644
--- a/cpp/src/parquet/page_index_test.cc
+++ b/cpp/src/parquet/page_index_test.cc
@@ -21,6 +21,7 @@
 #include <memory>
 
 #include "arrow/io/file.h"
+#include "arrow/util/float16.h"
 #include "parquet/file_reader.h"
 #include "parquet/metadata.h"
 #include "parquet/schema.h"
@@ -579,6 +580,27 @@ TEST(PageIndex, WriteFLBAColumnIndex) {
                             /*has_null_counts=*/false);
 }
 
+TEST(PageIndex, WriteFloat16ColumnIndex) {
+  using ::arrow::util::Float16;
+  auto encode = [](auto value) {
+    auto bytes = Float16(value).ToLittleEndian();
+    return std::string(reinterpret_cast<const char*>(bytes.data()), bytes.size());
+  };
+
+  // Float16 (FLBA) values in the ascending order and without null count.
+  std::vector<EncodedStatistics> page_stats(4);
+  page_stats.at(0).set_min(encode(-1.3)).set_max(encode(+3.6));
+  page_stats.at(1).set_min(encode(-0.2)).set_max(encode(+4.5));
+  page_stats.at(2).set_min(encode(+1.1)).set_max(encode(+5.4));
+  page_stats.at(3).set_min(encode(+2.0)).set_max(encode(+6.3));
+
+  auto node = schema::PrimitiveNode::Make(
+      "c1", Repetition::OPTIONAL, LogicalType::Float16(), Type::FIXED_LEN_BYTE_ARRAY,
+      /*length=*/2);
+  TestWriteTypedColumnIndex(std::move(node), page_stats, BoundaryOrder::Ascending,
+                            /*has_null_counts=*/false);
+}
+
 TEST(PageIndex, WriteColumnIndexWithAllNullPages) {
   // All values are null.
   std::vector<EncodedStatistics> page_stats(3);
diff --git a/cpp/src/parquet/parquet.thrift b/cpp/src/parquet/parquet.thrift
index 88e44c96cc24c..d802166be66e8 100644
--- a/cpp/src/parquet/parquet.thrift
+++ b/cpp/src/parquet/parquet.thrift
@@ -234,6 +234,7 @@ struct MapType {}     // see LogicalTypes.md
 struct ListType {}    // see LogicalTypes.md
 struct EnumType {}    // allowed for BINARY, must be encoded with UTF-8
 struct DateType {}    // allowed for INT32
+struct Float16Type{}  // allowed for FIXED[2], must encode raw FLOAT16 bytes
 
 /**
  * Logical type to annotate a column that is always null.
@@ -344,6 +345,7 @@ union LogicalType {
   12: JsonType JSON           // use ConvertedType JSON
   13: BsonType BSON           // use ConvertedType BSON
   14: UUIDType UUID           // no compatible ConvertedType
+  15: Float16Type FLOAT16     // no compatible ConvertedType
 }
 
 /**
diff --git a/cpp/src/parquet/schema_test.cc b/cpp/src/parquet/schema_test.cc
index 603d9ed8e2124..a1b5557497d9c 100644
--- a/cpp/src/parquet/schema_test.cc
+++ b/cpp/src/parquet/schema_test.cc
@@ -1147,6 +1147,9 @@ TEST(TestLogicalTypeConstruction, NewTypeIncompatibility) {
   auto check_is_UUID = [](const std::shared_ptr<const LogicalType>& logical_type) {
     return logical_type->is_UUID();
   };
+  auto check_is_float16 = [](const std::shared_ptr<const LogicalType>& logical_type) {
+    return logical_type->is_float16();
+  };
   auto check_is_null = [](const std::shared_ptr<const LogicalType>& logical_type) {
     return logical_type->is_null();
   };
@@ -1159,6 +1162,7 @@ TEST(TestLogicalTypeConstruction, NewTypeIncompatibility) {
 
   std::vector<ConfirmNewTypeIncompatibilityArguments> cases = {
       {LogicalType::UUID(), check_is_UUID},
+      {LogicalType::Float16(), check_is_float16},
       {LogicalType::Null(), check_is_null},
       {LogicalType::Time(false, LogicalType::TimeUnit::MILLIS), check_is_time},
       {LogicalType::Time(false, LogicalType::TimeUnit::MICROS), check_is_time},
@@ -1242,6 +1246,7 @@ TEST(TestLogicalTypeOperation, LogicalTypeProperties) {
       {JSONLogicalType::Make(), false, true, true},
       {BSONLogicalType::Make(), false, true, true},
       {UUIDLogicalType::Make(), false, true, true},
+      {Float16LogicalType::Make(), false, true, true},
       {NoLogicalType::Make(), false, false, true},
   };
 
@@ -1351,7 +1356,8 @@ TEST(TestLogicalTypeOperation, LogicalTypeApplicability) {
     int physical_length;
   };
 
-  std::vector<InapplicableType> inapplicable_types = {{Type::FIXED_LEN_BYTE_ARRAY, 8},
+  std::vector<InapplicableType> inapplicable_types = {{Type::FIXED_LEN_BYTE_ARRAY, 1},
+                                                      {Type::FIXED_LEN_BYTE_ARRAY, 8},
                                                       {Type::FIXED_LEN_BYTE_ARRAY, 20},
                                                       {Type::BOOLEAN, -1},
                                                       {Type::INT32, -1},
@@ -1374,6 +1380,12 @@ TEST(TestLogicalTypeOperation, LogicalTypeApplicability) {
   for (const InapplicableType& t : inapplicable_types) {
     ASSERT_FALSE(logical_type->is_applicable(t.physical_type, t.physical_length));
   }
+
+  logical_type = LogicalType::Float16();
+  ASSERT_TRUE(logical_type->is_applicable(Type::FIXED_LEN_BYTE_ARRAY, 2));
+  for (const InapplicableType& t : inapplicable_types) {
+    ASSERT_FALSE(logical_type->is_applicable(t.physical_type, t.physical_length));
+  }
 }
 
 TEST(TestLogicalTypeOperation, DecimalLogicalTypeApplicability) {
@@ -1531,6 +1543,7 @@ TEST(TestLogicalTypeOperation, LogicalTypeRepresentation) {
       {LogicalType::JSON(), "JSON", R"({"Type": "JSON"})"},
       {LogicalType::BSON(), "BSON", R"({"Type": "BSON"})"},
       {LogicalType::UUID(), "UUID", R"({"Type": "UUID"})"},
+      {LogicalType::Float16(), "Float16", R"({"Type": "Float16"})"},
       {LogicalType::None(), "None", R"({"Type": "None"})"},
   };
 
@@ -1580,6 +1593,7 @@ TEST(TestLogicalTypeOperation, LogicalTypeSortOrder) {
       {LogicalType::JSON(), SortOrder::UNSIGNED},
       {LogicalType::BSON(), SortOrder::UNSIGNED},
       {LogicalType::UUID(), SortOrder::UNSIGNED},
+      {LogicalType::Float16(), SortOrder::SIGNED},
       {LogicalType::None(), SortOrder::UNKNOWN}};
 
   for (const ExpectedSortOrder& c : cases) {
@@ -1712,6 +1726,15 @@ TEST(TestSchemaNodeCreation, FactoryExceptions) {
   ASSERT_ANY_THROW(PrimitiveNode::Make("uuid", Repetition::REQUIRED,
                                        UUIDLogicalType::Make(),
                                        Type::FIXED_LEN_BYTE_ARRAY, 64));
+
+  // Incompatible primitive type ...
+  ASSERT_ANY_THROW(PrimitiveNode::Make("float16", Repetition::REQUIRED,
+                                       Float16LogicalType::Make(), Type::BYTE_ARRAY, 2));
+  // Incompatible primitive length ...
+  ASSERT_ANY_THROW(PrimitiveNode::Make("float16", Repetition::REQUIRED,
+                                       Float16LogicalType::Make(),
+                                       Type::FIXED_LEN_BYTE_ARRAY, 3));
+
   // Non-positive length argument for fixed length binary ...
   ASSERT_ANY_THROW(PrimitiveNode::Make("negative_length", Repetition::REQUIRED,
                                        NoLogicalType::Make(), Type::FIXED_LEN_BYTE_ARRAY,
@@ -1902,6 +1925,9 @@ TEST_F(TestSchemaElementConstruction, SimpleCases) {
        [this]() { return element_->logicalType.__isset.BSON; }},
       {"uuid", LogicalType::UUID(), Type::FIXED_LEN_BYTE_ARRAY, 16, false,
        ConvertedType::NA, true, [this]() { return element_->logicalType.__isset.UUID; }},
+      {"float16", LogicalType::Float16(), Type::FIXED_LEN_BYTE_ARRAY, 2, false,
+       ConvertedType::NA, true,
+       [this]() { return element_->logicalType.__isset.FLOAT16; }},
       {"none", LogicalType::None(), Type::INT64, -1, false, ConvertedType::NA, false,
        check_nothing}};
 
@@ -2238,6 +2264,7 @@ TEST(TestLogicalTypeSerialization, Roundtrips) {
       {LogicalType::JSON(), Type::BYTE_ARRAY, -1},
       {LogicalType::BSON(), Type::BYTE_ARRAY, -1},
       {LogicalType::UUID(), Type::FIXED_LEN_BYTE_ARRAY, 16},
+      {LogicalType::Float16(), Type::FIXED_LEN_BYTE_ARRAY, 2},
       {LogicalType::None(), Type::BOOLEAN, -1}};
 
   for (const AnnotatedPrimitiveNodeFactoryArguments& c : cases) {
diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc
index ccfb69c487d40..37b245e0dd6c2 100644
--- a/cpp/src/parquet/statistics.cc
+++ b/cpp/src/parquet/statistics.cc
@@ -30,6 +30,7 @@
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_run_reader.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/float16.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/ubsan.h"
 #include "arrow/visit_data_inline.h"
@@ -41,6 +42,7 @@
 using arrow::default_memory_pool;
 using arrow::MemoryPool;
 using arrow::internal::checked_cast;
+using arrow::util::Float16;
 using arrow::util::SafeCopy;
 using arrow::util::SafeLoad;
 
@@ -53,6 +55,23 @@ namespace {
 constexpr int value_length(int value_length, const ByteArray& value) { return value.len; }
 constexpr int value_length(int type_length, const FLBA& value) { return type_length; }
 
+// Static "constants" for normalizing float16 min/max values. These need to be expressed
+// as pointers because `Float16LogicalType` represents an FLBA.
+struct Float16Constants {
+  static constexpr const uint8_t* lowest() { return lowest_.data(); }
+  static constexpr const uint8_t* max() { return max_.data(); }
+  static constexpr const uint8_t* positive_zero() { return positive_zero_.data(); }
+  static constexpr const uint8_t* negative_zero() { return negative_zero_.data(); }
+
+ private:
+  using Bytes = std::array<uint8_t, 2>;
+  static constexpr Bytes lowest_ =
+      std::numeric_limits<Float16>::lowest().ToLittleEndian();
+  static constexpr Bytes max_ = std::numeric_limits<Float16>::max().ToLittleEndian();
+  static constexpr Bytes positive_zero_ = (+Float16::FromBits(0)).ToLittleEndian();
+  static constexpr Bytes negative_zero_ = (-Float16::FromBits(0)).ToLittleEndian();
+};
+
 template <typename DType, bool is_signed>
 struct CompareHelper {
   using T = typename DType::c_type;
@@ -277,11 +296,43 @@ template <bool is_signed>
 struct CompareHelper<FLBAType, is_signed>
     : public BinaryLikeCompareHelperBase<FLBAType, is_signed> {};
 
+template <>
+struct CompareHelper<Float16LogicalType, /*is_signed=*/true> {
+  using T = FLBA;
+
+  static T DefaultMin() { return T{Float16Constants::max()}; }
+  static T DefaultMax() { return T{Float16Constants::lowest()}; }
+
+  static T Coalesce(T val, T fallback) {
+    return (val.ptr == nullptr || Float16::FromLittleEndian(val.ptr).is_nan()) ? fallback
+                                                                               : val;
+  }
+
+  static inline bool Compare(int type_length, const T& a, const T& b) {
+    const auto lhs = Float16::FromLittleEndian(a.ptr);
+    const auto rhs = Float16::FromLittleEndian(b.ptr);
+    // NaN is handled here (same behavior as native float compare)
+    return lhs < rhs;
+  }
+
+  static T Min(int type_length, const T& a, const T& b) {
+    if (a.ptr == nullptr) return b;
+    if (b.ptr == nullptr) return a;
+    return Compare(type_length, a, b) ? a : b;
+  }
+
+  static T Max(int type_length, const T& a, const T& b) {
+    if (a.ptr == nullptr) return b;
+    if (b.ptr == nullptr) return a;
+    return Compare(type_length, a, b) ? b : a;
+  }
+};
+
 using ::std::optional;
 
 template <typename T>
 ::arrow::enable_if_t<std::is_integral<T>::value, optional<std::pair<T, T>>>
-CleanStatistic(std::pair<T, T> min_max) {
+CleanStatistic(std::pair<T, T> min_max, LogicalType::Type::type) {
   return min_max;
 }
 
@@ -292,7 +343,7 @@ CleanStatistic(std::pair<T, T> min_max) {
 // - If max is -0.0f, replace with 0.0f
 template <typename T>
 ::arrow::enable_if_t<std::is_floating_point<T>::value, optional<std::pair<T, T>>>
-CleanStatistic(std::pair<T, T> min_max) {
+CleanStatistic(std::pair<T, T> min_max, LogicalType::Type::type) {
   T min = min_max.first;
   T max = min_max.second;
 
@@ -318,25 +369,67 @@ CleanStatistic(std::pair<T, T> min_max) {
   return {{min, max}};
 }
 
-optional<std::pair<FLBA, FLBA>> CleanStatistic(std::pair<FLBA, FLBA> min_max) {
+optional<std::pair<FLBA, FLBA>> CleanFloat16Statistic(std::pair<FLBA, FLBA> min_max) {
+  FLBA min_flba = min_max.first;
+  FLBA max_flba = min_max.second;
+  Float16 min = Float16::FromLittleEndian(min_flba.ptr);
+  Float16 max = Float16::FromLittleEndian(max_flba.ptr);
+
+  if (min.is_nan() || max.is_nan()) {
+    return ::std::nullopt;
+  }
+
+  if (min == std::numeric_limits<Float16>::max() &&
+      max == std::numeric_limits<Float16>::lowest()) {
+    return ::std::nullopt;
+  }
+
+  if (min.is_zero() && !min.signbit()) {
+    min_flba = FLBA{Float16Constants::negative_zero()};
+  }
+  if (max.is_zero() && max.signbit()) {
+    max_flba = FLBA{Float16Constants::positive_zero()};
+  }
+
+  return {{min_flba, max_flba}};
+}
+
+optional<std::pair<FLBA, FLBA>> CleanStatistic(std::pair<FLBA, FLBA> min_max,
+                                               LogicalType::Type::type logical_type) {
   if (min_max.first.ptr == nullptr || min_max.second.ptr == nullptr) {
     return ::std::nullopt;
   }
+  if (logical_type == LogicalType::Type::FLOAT16) {
+    return CleanFloat16Statistic(std::move(min_max));
+  }
   return min_max;
 }
 
 optional<std::pair<ByteArray, ByteArray>> CleanStatistic(
-    std::pair<ByteArray, ByteArray> min_max) {
+    std::pair<ByteArray, ByteArray> min_max, LogicalType::Type::type) {
   if (min_max.first.ptr == nullptr || min_max.second.ptr == nullptr) {
     return ::std::nullopt;
   }
   return min_max;
 }
 
+template <typename T>
+struct RebindLogical {
+  using DType = T;
+  using c_type = typename DType::c_type;
+};
+
+template <>
+struct RebindLogical<Float16LogicalType> {
+  using DType = FLBAType;
+  using c_type = DType::c_type;
+};
+
 template <bool is_signed, typename DType>
-class TypedComparatorImpl : virtual public TypedComparator<DType> {
+class TypedComparatorImpl
+    : virtual public TypedComparator<typename RebindLogical<DType>::DType> {
  public:
-  using T = typename DType::c_type;
+  using T = typename RebindLogical<DType>::c_type;
   using Helper = CompareHelper<DType, is_signed>;
 
   explicit TypedComparatorImpl(int type_length = -1) : type_length_(type_length) {}
@@ -384,7 +477,9 @@ class TypedComparatorImpl : virtual public TypedComparator<DType> {
     return {min, max};
   }
 
-  std::pair<T, T> GetMinMax(const ::arrow::Array& values) override;
+  std::pair<T, T> GetMinMax(const ::arrow::Array& values) override {
+    ParquetException::NYI(values.type()->ToString());
+  }
 
  private:
   int type_length_;
@@ -412,12 +507,6 @@ TypedComparatorImpl</*is_signed=*/false, Int32Type>::GetMinMax(const int32_t* va
   return {SafeCopy<int32_t>(min), SafeCopy<int32_t>(max)};
 }
 
-template <bool is_signed, typename DType>
-std::pair<typename DType::c_type, typename DType::c_type>
-TypedComparatorImpl<is_signed, DType>::GetMinMax(const ::arrow::Array& values) {
-  ParquetException::NYI(values.type()->ToString());
-}
-
 template <bool is_signed>
 std::pair<ByteArray, ByteArray> GetMinMaxBinaryHelper(
     const TypedComparatorImpl<is_signed, ByteArrayType>& comparator,
@@ -458,6 +547,16 @@ std::pair<ByteArray, ByteArray> TypedComparatorImpl<false, ByteArrayType>::GetMi
   return GetMinMaxBinaryHelper<false>(*this, values);
 }
 
+LogicalType::Type::type LogicalTypeId(const ColumnDescriptor* descr) {
+  if (const auto& logical_type = descr->logical_type()) {
+    return logical_type->type();
+  }
+  return LogicalType::Type::NONE;
+}
+LogicalType::Type::type LogicalTypeId(const Statistics& stats) {
+  return LogicalTypeId(stats.descr());
+}
+
 template <typename DType>
 class TypedStatisticsImpl : public TypedStatistics<DType> {
  public:
@@ -468,9 +567,9 @@ class TypedStatisticsImpl : public TypedStatistics<DType> {
       : descr_(descr),
         pool_(pool),
         min_buffer_(AllocateBuffer(pool_, 0)),
-        max_buffer_(AllocateBuffer(pool_, 0)) {
-    auto comp = Comparator::Make(descr);
-    comparator_ = std::static_pointer_cast<TypedComparator<DType>>(comp);
+        max_buffer_(AllocateBuffer(pool_, 0)),
+        logical_type_(LogicalTypeId(descr_)) {
+    comparator_ = MakeComparator<DType>(descr);
     TypedStatisticsImpl::Reset();
   }
 
@@ -527,9 +626,27 @@ class TypedStatisticsImpl : public TypedStatistics<DType> {
 
   void IncrementNumValues(int64_t n) override { num_values_ += n; }
 
+  static bool IsMeaningfulLogicalType(LogicalType::Type::type type) {
+    switch (type) {
+      case LogicalType::Type::FLOAT16:
+        return true;
+      default:
+        return false;
+    }
+  }
+
   bool Equals(const Statistics& raw_other) const override {
     if (physical_type() != raw_other.physical_type()) return false;
 
+    const auto other_logical_type = LogicalTypeId(raw_other);
+    // Only compare against logical types that influence the interpretation of the
+    // physical type
+    if (IsMeaningfulLogicalType(logical_type_)) {
+      if (logical_type_ != other_logical_type) return false;
+    } else if (IsMeaningfulLogicalType(other_logical_type)) {
+      return false;
+    }
+
     const auto& other = checked_cast<const TypedStatisticsImpl&>(raw_other);
 
     if (has_min_max_ != other.has_min_max_) return false;
@@ -655,6 +772,7 @@ class TypedStatisticsImpl : public TypedStatistics<DType> {
   EncodedStatistics statistics_;
   std::shared_ptr<TypedComparator<DType>> comparator_;
   std::shared_ptr<ResizableBuffer> min_buffer_, max_buffer_;
+  LogicalType::Type::type logical_type_ = LogicalType::Type::NONE;
 
   void PlainEncode(const T& src, std::string* dst) const;
   void PlainDecode(const std::string& src, T* dst) const;
@@ -686,7 +804,7 @@ class TypedStatisticsImpl : public TypedStatistics<DType> {
 
   void SetMinMaxPair(std::pair<T, T> min_max) {
     // CleanStatistic can return a nullopt in case of erroneous values, e.g. NaN
-    auto maybe_min_max = CleanStatistic(min_max);
+    auto maybe_min_max = CleanStatistic(min_max, logical_type_);
     if (!maybe_min_max) return;
 
     auto min = maybe_min_max.value().first;
@@ -795,12 +913,8 @@ void TypedStatisticsImpl<ByteArrayType>::PlainDecode(const std::string& src,
   dst->ptr = reinterpret_cast<const uint8_t*>(src.c_str());
 }
 
-}  // namespace
-
-// ----------------------------------------------------------------------
-// Public factory functions
-
-std::shared_ptr<Comparator> Comparator::Make(Type::type physical_type,
+std::shared_ptr<Comparator> DoMakeComparator(Type::type physical_type,
+                                             LogicalType::Type::type logical_type,
                                              SortOrder::type sort_order,
                                              int type_length) {
   if (SortOrder::SIGNED == sort_order) {
@@ -820,6 +934,10 @@ std::shared_ptr<Comparator> Comparator::Make(Type::type physical_type,
       case Type::BYTE_ARRAY:
         return std::make_shared<TypedComparatorImpl<true, ByteArrayType>>();
       case Type::FIXED_LEN_BYTE_ARRAY:
+        if (logical_type == LogicalType::Type::FLOAT16) {
+          return std::make_shared<TypedComparatorImpl<true, Float16LogicalType>>(
+              type_length);
+        }
         return std::make_shared<TypedComparatorImpl<true, FLBAType>>(type_length);
       default:
         ParquetException::NYI("Signed Compare not implemented");
@@ -845,8 +963,21 @@ std::shared_ptr<Comparator> Comparator::Make(Type::type physical_type,
   return nullptr;
 }
 
+}  // namespace
+
+// ----------------------------------------------------------------------
+// Public factory functions
+
+std::shared_ptr<Comparator> Comparator::Make(Type::type physical_type,
+                                             SortOrder::type sort_order,
+                                             int type_length) {
+  return DoMakeComparator(physical_type, LogicalType::Type::NONE, sort_order,
+                          type_length);
+}
+
 std::shared_ptr<Comparator> Comparator::Make(const ColumnDescriptor* descr) {
-  return Make(descr->physical_type(), descr->sort_order(), descr->type_length());
+  return DoMakeComparator(descr->physical_type(), LogicalTypeId(descr),
+                          descr->sort_order(), descr->type_length());
 }
 
 std::shared_ptr<Statistics> Statistics::Make(const ColumnDescriptor* descr,
diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc
index 637832945ec57..cb2e6455abfa9 100644
--- a/cpp/src/parquet/statistics_test.cc
+++ b/cpp/src/parquet/statistics_test.cc
@@ -34,6 +34,7 @@
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
+#include "arrow/util/float16.h"
 #include "arrow/util/ubsan.h"
 
 #include "parquet/column_reader.h"
@@ -49,6 +50,7 @@
 
 using arrow::default_memory_pool;
 using arrow::MemoryPool;
+using arrow::util::Float16;
 using arrow::util::SafeCopy;
 
 namespace bit_util = arrow::bit_util;
@@ -875,9 +877,22 @@ TEST(CorrectStatistics, Basics) {
 // Test SortOrder class
 static const int NUM_VALUES = 10;
 
-template <typename TestType>
+template <typename T>
+struct RebindLogical {
+  using ParquetType = T;
+  using CType = typename T::c_type;
+};
+
+template <>
+struct RebindLogical<Float16LogicalType> {
+  using ParquetType = FLBAType;
+  using CType = ParquetType::c_type;
+};
+
+template <typename T>
 class TestStatisticsSortOrder : public ::testing::Test {
  public:
+  using TestType = typename RebindLogical<T>::ParquetType;
   using c_type = typename TestType::c_type;
 
   void SetUp() override {
@@ -955,7 +970,7 @@ class TestStatisticsSortOrder : public ::testing::Test {
 };
 
 using CompareTestTypes = ::testing::Types<Int32Type, Int64Type, FloatType, DoubleType,
-                                          ByteArrayType, FLBAType>;
+                                          ByteArrayType, FLBAType, Float16LogicalType>;
 
 // TYPE::INT32
 template <>
@@ -1102,6 +1117,39 @@ void TestStatisticsSortOrder<FLBAType>::SetValues() {
       .set_max(std::string(reinterpret_cast<const char*>(&vals[8][0]), FLBA_LENGTH));
 }
 
+template <>
+void TestStatisticsSortOrder<Float16LogicalType>::AddNodes(std::string name) {
+  auto node =
+      schema::PrimitiveNode::Make(name, Repetition::REQUIRED, LogicalType::Float16(),
+                                  Type::FIXED_LEN_BYTE_ARRAY, sizeof(uint16_t));
+  fields_.push_back(std::move(node));
+}
+
+template <>
+void TestStatisticsSortOrder<Float16LogicalType>::SetValues() {
+  constexpr int kValueLen = 2;
+  constexpr int kNumBytes = NUM_VALUES * kValueLen;
+
+  const Float16 f16_vals[NUM_VALUES] = {
+      Float16::FromFloat(+2.0f), Float16::FromFloat(-4.0f), Float16::FromFloat(+4.0f),
+      Float16::FromFloat(-2.0f), Float16::FromFloat(-1.0f), Float16::FromFloat(+3.0f),
+      Float16::FromFloat(+1.0f), Float16::FromFloat(-5.0f), Float16::FromFloat(+0.0f),
+      Float16::FromFloat(-3.0f),
+  };
+
+  values_buf_.resize(kNumBytes);
+  uint8_t* ptr = values_buf_.data();
+  for (int i = 0; i < NUM_VALUES; ++i) {
+    f16_vals[i].ToLittleEndian(ptr);
+    values_[i].ptr = ptr;
+    ptr += kValueLen;
+  }
+
+  stats_[0]
+      .set_min(std::string(reinterpret_cast<const char*>(values_[7].ptr), kValueLen))
+      .set_max(std::string(reinterpret_cast<const char*>(values_[2].ptr), kValueLen));
+}
+
 TYPED_TEST_SUITE(TestStatisticsSortOrder, CompareTestTypes);
 
 TYPED_TEST(TestStatisticsSortOrder, MinMax) {
@@ -1167,12 +1215,20 @@ TEST_F(TestStatisticsSortOrderFLBA, UnknownSortOrder) {
   ASSERT_FALSE(cc_metadata->is_stats_set());
 }
 
+template <typename T>
+static std::string EncodeValue(const T& val) {
+  return std::string(reinterpret_cast<const char*>(&val), sizeof(val));
+}
+static std::string EncodeValue(const FLBA& val, int length = sizeof(uint16_t)) {
+  return std::string(reinterpret_cast<const char*>(val.ptr), length);
+}
+
 template <typename Stats, typename Array, typename T = typename Array::value_type>
 void AssertMinMaxAre(Stats stats, const Array& values, T expected_min, T expected_max) {
   stats->Update(values.data(), values.size(), 0);
   ASSERT_TRUE(stats->HasMinMax());
-  EXPECT_EQ(stats->min(), expected_min);
-  EXPECT_EQ(stats->max(), expected_max);
+  EXPECT_EQ(stats->EncodeMin(), EncodeValue(expected_min));
+  EXPECT_EQ(stats->EncodeMax(), EncodeValue(expected_max));
 }
 
 template <typename Stats, typename Array, typename T = typename Stats::T>
@@ -1184,8 +1240,8 @@ void AssertMinMaxAre(Stats stats, const Array& values, const uint8_t* valid_bitm
   stats->UpdateSpaced(values.data(), valid_bitmap, 0, non_null_count + null_count,
                       non_null_count, null_count);
   ASSERT_TRUE(stats->HasMinMax());
-  EXPECT_EQ(stats->min(), expected_min);
-  EXPECT_EQ(stats->max(), expected_max);
+  EXPECT_EQ(stats->EncodeMin(), EncodeValue(expected_min));
+  EXPECT_EQ(stats->EncodeMax(), EncodeValue(expected_max));
 }
 
 template <typename Stats, typename Array>
@@ -1268,50 +1324,225 @@ void CheckExtrema() {
 TEST(TestStatistic, Int32Extrema) { CheckExtrema<Int32Type>(); }
 TEST(TestStatistic, Int64Extrema) { CheckExtrema<Int64Type>(); }
 
-// PARQUET-1225: Float NaN values may lead to incorrect min-max
-template <typename ParquetType>
-void CheckNaNs() {
-  using T = typename ParquetType::c_type;
+template <typename T>
+class TestFloatStatistics : public ::testing::Test {
+ public:
+  using ParquetType = typename RebindLogical<T>::ParquetType;
+  using c_type = typename ParquetType::c_type;
+
+  void Init();
+  void SetUp() override {
+    this->Init();
+    ASSERT_NE(EncodeValue(negative_zero_), EncodeValue(positive_zero_));
+  }
+
+  bool signbit(c_type val);
+  void CheckEq(const c_type& l, const c_type& r);
+  NodePtr MakeNode(const std::string& name, Repetition::type rep);
+
+  template <typename Stats, typename Values>
+  void CheckMinMaxZeroesSign(Stats stats, const Values& values) {
+    stats->Update(values.data(), values.size(), /*null_count=*/0);
+    ASSERT_TRUE(stats->HasMinMax());
+
+    this->CheckEq(stats->min(), positive_zero_);
+    ASSERT_TRUE(this->signbit(stats->min()));
+    ASSERT_EQ(stats->EncodeMin(), EncodeValue(negative_zero_));
+
+    this->CheckEq(stats->max(), positive_zero_);
+    ASSERT_FALSE(this->signbit(stats->max()));
+    ASSERT_EQ(stats->EncodeMax(), EncodeValue(positive_zero_));
+  }
+
+  // ARROW-5562: Ensure that -0.0f and 0.0f values are properly handled like in
+  // parquet-mr
+  void TestNegativeZeroes() {
+    NodePtr node = this->MakeNode("f", Repetition::OPTIONAL);
+    ColumnDescriptor descr(node, 1, 1);
+
+    {
+      std::array<c_type, 2> values{negative_zero_, positive_zero_};
+      auto stats = MakeStatistics<ParquetType>(&descr);
+      CheckMinMaxZeroesSign(stats, values);
+    }
+
+    {
+      std::array<c_type, 2> values{positive_zero_, negative_zero_};
+      auto stats = MakeStatistics<ParquetType>(&descr);
+      CheckMinMaxZeroesSign(stats, values);
+    }
+
+    {
+      std::array<c_type, 2> values{negative_zero_, negative_zero_};
+      auto stats = MakeStatistics<ParquetType>(&descr);
+      CheckMinMaxZeroesSign(stats, values);
+    }
+
+    {
+      std::array<c_type, 2> values{positive_zero_, positive_zero_};
+      auto stats = MakeStatistics<ParquetType>(&descr);
+      CheckMinMaxZeroesSign(stats, values);
+    }
+  }
+
+  // PARQUET-1225: Float NaN values may lead to incorrect min-max
+  template <typename Values>
+  void CheckNaNs(ColumnDescriptor* descr, const Values& all_nans, const Values& some_nans,
+                 const Values& other_nans, c_type min, c_type max, uint8_t valid_bitmap,
+                 uint8_t valid_bitmap_no_nans) {
+    auto some_nan_stats = MakeStatistics<ParquetType>(descr);
+    // Ingesting only nans should not yield valid min max
+    AssertUnsetMinMax(some_nan_stats, all_nans);
+    // Ingesting a mix of NaNs and non-NaNs should yield a valid min max.
+    AssertMinMaxAre(some_nan_stats, some_nans, min, max);
+    // Ingesting only nans after a valid min/max, should have no effect
+    AssertMinMaxAre(some_nan_stats, all_nans, min, max);
+
+    some_nan_stats = MakeStatistics<ParquetType>(descr);
+    AssertUnsetMinMax(some_nan_stats, all_nans, &valid_bitmap);
+    // NaNs should not pollute min max when excluded via null bitmap.
+    AssertMinMaxAre(some_nan_stats, some_nans, &valid_bitmap_no_nans, min, max);
+    // Ingesting NaNs with a null bitmap should not change the result.
+    AssertMinMaxAre(some_nan_stats, some_nans, &valid_bitmap, min, max);
+
+    // An array that doesn't start with NaN
+    auto other_stats = MakeStatistics<ParquetType>(descr);
+    AssertMinMaxAre(other_stats, other_nans, min, max);
+  }
+
+  void TestNaNs();
+
+ protected:
+  std::vector<uint8_t> data_buf_;
+  c_type positive_zero_;
+  c_type negative_zero_;
+};
+
+template <typename T>
+void TestFloatStatistics<T>::Init() {
+  positive_zero_ = c_type{};
+  negative_zero_ = -positive_zero_;
+}
+template <>
+void TestFloatStatistics<Float16LogicalType>::Init() {
+  data_buf_.resize(4);
+  (+Float16(0)).ToLittleEndian(&data_buf_[0]);
+  positive_zero_ = FLBA{&data_buf_[0]};
+  (-Float16(0)).ToLittleEndian(&data_buf_[2]);
+  negative_zero_ = FLBA{&data_buf_[2]};
+}
+
+template <typename T>
+NodePtr TestFloatStatistics<T>::MakeNode(const std::string& name, Repetition::type rep) {
+  return PrimitiveNode::Make(name, rep, ParquetType::type_num);
+}
+template <>
+NodePtr TestFloatStatistics<Float16LogicalType>::MakeNode(const std::string& name,
+                                                          Repetition::type rep) {
+  return PrimitiveNode::Make(name, rep, LogicalType::Float16(),
+                             Type::FIXED_LEN_BYTE_ARRAY, 2);
+}
+
+template <typename T>
+void TestFloatStatistics<T>::CheckEq(const c_type& l, const c_type& r) {
+  ASSERT_EQ(l, r);
+}
+template <>
+void TestFloatStatistics<Float16LogicalType>::CheckEq(const c_type& a, const c_type& b) {
+  auto l = Float16::FromLittleEndian(a.ptr);
+  auto r = Float16::FromLittleEndian(b.ptr);
+  ASSERT_EQ(l, r);
+}
 
+template <typename T>
+bool TestFloatStatistics<T>::signbit(c_type val) {
+  return std::signbit(val);
+}
+template <>
+bool TestFloatStatistics<Float16LogicalType>::signbit(c_type val) {
+  return Float16::FromLittleEndian(val.ptr).signbit();
+}
+
+template <typename T>
+void TestFloatStatistics<T>::TestNaNs() {
   constexpr int kNumValues = 8;
-  NodePtr node = PrimitiveNode::Make("f", Repetition::OPTIONAL, ParquetType::type_num);
+  NodePtr node = this->MakeNode("f", Repetition::OPTIONAL);
   ColumnDescriptor descr(node, 1, 1);
 
-  constexpr T nan = std::numeric_limits<T>::quiet_NaN();
-  constexpr T min = -4.0f;
-  constexpr T max = 3.0f;
+  constexpr c_type nan = std::numeric_limits<c_type>::quiet_NaN();
+  constexpr c_type min = -4.0f;
+  constexpr c_type max = 3.0f;
+
+  std::array<c_type, kNumValues> all_nans{nan, nan, nan, nan, nan, nan, nan, nan};
+  std::array<c_type, kNumValues> some_nans{nan, max, -3.0f, -1.0f, nan, 2.0f, min, nan};
+  std::array<c_type, kNumValues> other_nans{1.5f, max, -3.0f, -1.0f, nan, 2.0f, min, nan};
 
-  std::array<T, kNumValues> all_nans{nan, nan, nan, nan, nan, nan, nan, nan};
-  std::array<T, kNumValues> some_nans{nan, max, -3.0f, -1.0f, nan, 2.0f, min, nan};
   uint8_t valid_bitmap = 0x7F;  // 0b01111111
   // NaNs excluded
   uint8_t valid_bitmap_no_nans = 0x6E;  // 0b01101110
 
-  // Test values
-  auto some_nan_stats = MakeStatistics<ParquetType>(&descr);
-  // Ingesting only nans should not yield valid min max
-  AssertUnsetMinMax(some_nan_stats, all_nans);
-  // Ingesting a mix of NaNs and non-NaNs should not yield valid min max.
-  AssertMinMaxAre(some_nan_stats, some_nans, min, max);
-  // Ingesting only nans after a valid min/max, should have not effect
-  AssertMinMaxAre(some_nan_stats, all_nans, min, max);
+  this->CheckNaNs(&descr, all_nans, some_nans, other_nans, min, max, valid_bitmap,
+                  valid_bitmap_no_nans);
+}
 
-  some_nan_stats = MakeStatistics<ParquetType>(&descr);
-  AssertUnsetMinMax(some_nan_stats, all_nans, &valid_bitmap);
-  // NaNs should not pollute min max when excluded via null bitmap.
-  AssertMinMaxAre(some_nan_stats, some_nans, &valid_bitmap_no_nans, min, max);
-  // Ingesting NaNs with a null bitmap should not change the result.
-  AssertMinMaxAre(some_nan_stats, some_nans, &valid_bitmap, min, max);
+struct BufferedFloat16 {
+  explicit BufferedFloat16(Float16 f16) : f16(f16) {
+    this->f16.ToLittleEndian(bytes_.data());
+  }
+  explicit BufferedFloat16(float f) : BufferedFloat16(Float16::FromFloat(f)) {}
+  const uint8_t* bytes() const { return bytes_.data(); }
+
+  Float16 f16;
+  std::array<uint8_t, 2> bytes_;
+};
+
+template <>
+void TestFloatStatistics<Float16LogicalType>::TestNaNs() {
+  constexpr int kNumValues = 8;
+
+  NodePtr node = this->MakeNode("f", Repetition::OPTIONAL);
+  ColumnDescriptor descr(node, 1, 1);
+
+  using F16 = BufferedFloat16;
+  const auto nan_f16 = F16(std::numeric_limits<Float16>::quiet_NaN());
+  const auto min_f16 = F16(-4.0f);
+  const auto max_f16 = F16(+3.0f);
+
+  const auto min = FLBA{min_f16.bytes()};
+  const auto max = FLBA{max_f16.bytes()};
+
+  std::array<F16, kNumValues> all_nans_f16 = {nan_f16, nan_f16, nan_f16, nan_f16,
+                                              nan_f16, nan_f16, nan_f16, nan_f16};
+  std::array<F16, kNumValues> some_nans_f16 = {
+      nan_f16, max_f16, F16(-3.0f), F16(-1.0f), nan_f16, F16(+2.0f), min_f16, nan_f16};
+  std::array<F16, kNumValues> other_nans_f16 = some_nans_f16;
+  other_nans_f16[0] = F16(+1.5f);  // +1.5
+
+  auto prepare_values = [](const auto& values) -> std::vector<FLBA> {
+    std::vector<FLBA> out(values.size());
+    std::transform(values.begin(), values.end(), out.begin(),
+                   [](const F16& f16) { return FLBA{f16.bytes()}; });
+    return out;
+  };
+
+  auto all_nans = prepare_values(all_nans_f16);
+  auto some_nans = prepare_values(some_nans_f16);
+  auto other_nans = prepare_values(other_nans_f16);
+
+  uint8_t valid_bitmap = 0x7F;  // 0b01111111
+  // NaNs excluded
+  uint8_t valid_bitmap_no_nans = 0x6E;  // 0b01101110
 
-  // An array that doesn't start with NaN
-  std::array<T, kNumValues> other_nans{1.5f, max, -3.0f, -1.0f, nan, 2.0f, min, nan};
-  auto other_stats = MakeStatistics<ParquetType>(&descr);
-  AssertMinMaxAre(other_stats, other_nans, min, max);
+  this->CheckNaNs(&descr, all_nans, some_nans, other_nans, min, max, valid_bitmap,
+                  valid_bitmap_no_nans);
 }
 
-TEST(TestStatistic, NaNFloatValues) { CheckNaNs<FloatType>(); }
+using FloatingPointTypes = ::testing::Types<FloatType, DoubleType, Float16LogicalType>;
+
+TYPED_TEST_SUITE(TestFloatStatistics, FloatingPointTypes);
 
-TEST(TestStatistic, NaNDoubleValues) { CheckNaNs<DoubleType>(); }
+TYPED_TEST(TestFloatStatistics, NegativeZeros) { this->TestNegativeZeroes(); }
+TYPED_TEST(TestFloatStatistics, NaNs) { this->TestNaNs(); }
 
 // ARROW-7376
 TEST(TestStatisticsSortOrderFloatNaN, NaNAndNullsInfiniteLoop) {
@@ -1327,58 +1558,6 @@ TEST(TestStatisticsSortOrderFloatNaN, NaNAndNullsInfiniteLoop) {
   AssertUnsetMinMax(stats, nans_but_last, &all_but_last_valid);
 }
 
-template <typename Stats, typename Array, typename T = typename Array::value_type>
-void AssertMinMaxZeroesSign(Stats stats, const Array& values) {
-  stats->Update(values.data(), values.size(), 0);
-  ASSERT_TRUE(stats->HasMinMax());
-
-  T zero{};
-  ASSERT_EQ(stats->min(), zero);
-  ASSERT_TRUE(std::signbit(stats->min()));
-
-  ASSERT_EQ(stats->max(), zero);
-  ASSERT_FALSE(std::signbit(stats->max()));
-}
-
-// ARROW-5562: Ensure that -0.0f and 0.0f values are properly handled like in
-// parquet-mr
-template <typename ParquetType>
-void CheckNegativeZeroStats() {
-  using T = typename ParquetType::c_type;
-
-  NodePtr node = PrimitiveNode::Make("f", Repetition::OPTIONAL, ParquetType::type_num);
-  ColumnDescriptor descr(node, 1, 1);
-  T zero{};
-
-  {
-    std::array<T, 2> values{-zero, zero};
-    auto stats = MakeStatistics<ParquetType>(&descr);
-    AssertMinMaxZeroesSign(stats, values);
-  }
-
-  {
-    std::array<T, 2> values{zero, -zero};
-    auto stats = MakeStatistics<ParquetType>(&descr);
-    AssertMinMaxZeroesSign(stats, values);
-  }
-
-  {
-    std::array<T, 2> values{-zero, -zero};
-    auto stats = MakeStatistics<ParquetType>(&descr);
-    AssertMinMaxZeroesSign(stats, values);
-  }
-
-  {
-    std::array<T, 2> values{zero, zero};
-    auto stats = MakeStatistics<ParquetType>(&descr);
-    AssertMinMaxZeroesSign(stats, values);
-  }
-}
-
-TEST(TestStatistics, FloatNegativeZero) { CheckNegativeZeroStats<FloatType>(); }
-
-TEST(TestStatistics, DoubleNegativeZero) { CheckNegativeZeroStats<DoubleType>(); }
-
 // Test statistics for binary column with UNSIGNED sort order
 TEST(TestStatisticsSortOrderMinMax, Unsigned) {
   std::string dir_string(test::get_data_dir());
diff --git a/cpp/src/parquet/test_util.cc b/cpp/src/parquet/test_util.cc
index b65945cc7329f..a6fa8afc0f5b3 100644
--- a/cpp/src/parquet/test_util.cc
+++ b/cpp/src/parquet/test_util.cc
@@ -101,6 +101,16 @@ void random_Int96_numbers(int n, uint32_t seed, int32_t min_value, int32_t max_v
   }
 }
 
+void random_float16_numbers(int n, uint32_t seed, ::arrow::util::Float16 min_value,
+                            ::arrow::util::Float16 max_value, uint16_t* out) {
+  std::vector<float> values(n);
+  random_numbers(n, seed, static_cast<float>(min_value), static_cast<float>(max_value),
+                 values.data());
+  for (int i = 0; i < n; ++i) {
+    out[i] = ::arrow::util::Float16(values[i]).bits();
+  }
+}
+
 void random_fixed_byte_array(int n, uint32_t seed, uint8_t* buf, int len, FLBA* out) {
   std::default_random_engine gen(seed);
   std::uniform_int_distribution<int> d(0, 255);
diff --git a/cpp/src/parquet/test_util.h b/cpp/src/parquet/test_util.h
index c8578609e9b1d..59728cf53f699 100644
--- a/cpp/src/parquet/test_util.h
+++ b/cpp/src/parquet/test_util.h
@@ -33,6 +33,7 @@
 
 #include "arrow/io/memory.h"
 #include "arrow/testing/util.h"
+#include "arrow/util/float16.h"
 
 #include "parquet/column_page.h"
 #include "parquet/column_reader.h"
@@ -148,6 +149,9 @@ inline void random_numbers(int n, uint32_t seed, double min_value, double max_va
 void random_Int96_numbers(int n, uint32_t seed, int32_t min_value, int32_t max_value,
                           Int96* out);
 
+void random_float16_numbers(int n, uint32_t seed, ::arrow::util::Float16 min_value,
+                            ::arrow::util::Float16 max_value, uint16_t* out);
+
 void random_fixed_byte_array(int n, uint32_t seed, uint8_t* buf, int len, FLBA* out);
 
 void random_byte_array(int n, uint32_t seed, uint8_t* buf, ByteArray* out, int min_size,
diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc
index 3127b60e5d1ae..04a0fc2e0117b 100644
--- a/cpp/src/parquet/types.cc
+++ b/cpp/src/parquet/types.cc
@@ -441,6 +441,8 @@ std::shared_ptr<const LogicalType> LogicalType::FromThrift(
     return BSONLogicalType::Make();
   } else if (type.__isset.UUID) {
     return UUIDLogicalType::Make();
+  } else if (type.__isset.FLOAT16) {
+    return Float16LogicalType::Make();
   } else {
     throw ParquetException("Metadata contains Thrift LogicalType that is not recognized");
   }
@@ -494,6 +496,10 @@ std::shared_ptr<const LogicalType> LogicalType::BSON() { return BSONLogicalType:
 
 std::shared_ptr<const LogicalType> LogicalType::UUID() { return UUIDLogicalType::Make(); }
 
+std::shared_ptr<const LogicalType> LogicalType::Float16() {
+  return Float16LogicalType::Make();
+}
+
 std::shared_ptr<const LogicalType> LogicalType::None() { return NoLogicalType::Make(); }
 
 /*
@@ -575,6 +581,7 @@ class LogicalType::Impl {
   class JSON;
   class BSON;
   class UUID;
+  class Float16;
   class No;
   class Undefined;
 
@@ -644,6 +651,9 @@ bool LogicalType::is_null() const { return impl_->type() == LogicalType::Type::N
 bool LogicalType::is_JSON() const { return impl_->type() == LogicalType::Type::JSON; }
 bool LogicalType::is_BSON() const { return impl_->type() == LogicalType::Type::BSON; }
 bool LogicalType::is_UUID() const { return impl_->type() == LogicalType::Type::UUID; }
+bool LogicalType::is_float16() const {
+  return impl_->type() == LogicalType::Type::FLOAT16;
+}
 bool LogicalType::is_none() const { return impl_->type() == LogicalType::Type::NONE; }
 bool LogicalType::is_valid() const {
   return impl_->type() != LogicalType::Type::UNDEFINED;
@@ -1557,6 +1567,22 @@ class LogicalType::Impl::UUID final : public LogicalType::Impl::Incompatible,
 
 GENERATE_MAKE(UUID)
 
+class LogicalType::Impl::Float16 final : public LogicalType::Impl::Incompatible,
+                                         public LogicalType::Impl::TypeLengthApplicable {
+ public:
+  friend class Float16LogicalType;
+
+  OVERRIDE_TOSTRING(Float16)
+  OVERRIDE_TOTHRIFT(Float16Type, FLOAT16)
+
+ private:
+  Float16()
+      : LogicalType::Impl(LogicalType::Type::FLOAT16, SortOrder::SIGNED),
+        LogicalType::Impl::TypeLengthApplicable(parquet::Type::FIXED_LEN_BYTE_ARRAY, 2) {}
+};
+
+GENERATE_MAKE(Float16)
+
 class LogicalType::Impl::No final : public LogicalType::Impl::SimpleCompatible,
                                     public LogicalType::Impl::UniversalApplicable {
  public:
diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h
index 0315376a883e9..76dd0efc7cb4a 100644
--- a/cpp/src/parquet/types.h
+++ b/cpp/src/parquet/types.h
@@ -157,6 +157,7 @@ class PARQUET_EXPORT LogicalType {
       JSON,
       BSON,
       UUID,
+      FLOAT16,
       NONE  // Not a real logical type; should always be last element
     };
   };
@@ -210,6 +211,7 @@ class PARQUET_EXPORT LogicalType {
   static std::shared_ptr<const LogicalType> JSON();
   static std::shared_ptr<const LogicalType> BSON();
   static std::shared_ptr<const LogicalType> UUID();
+  static std::shared_ptr<const LogicalType> Float16();
 
   /// \brief Create a placeholder for when no logical type is specified
   static std::shared_ptr<const LogicalType> None();
@@ -263,6 +265,7 @@ class PARQUET_EXPORT LogicalType {
   bool is_JSON() const;
   bool is_BSON() const;
   bool is_UUID() const;
+  bool is_float16() const;
   bool is_none() const;
   /// \brief Return true if this logical type is of a known type.
   bool is_valid() const;
@@ -433,6 +436,16 @@ class PARQUET_EXPORT UUIDLogicalType : public LogicalType {
   UUIDLogicalType() = default;
 };
 
+/// \brief Allowed for physical type FIXED_LEN_BYTE_ARRAY with length 2,
+/// must encode raw FLOAT16 bytes.
+class PARQUET_EXPORT Float16LogicalType : public LogicalType {
+ public:
+  static std::shared_ptr<const LogicalType> Make();
+
+ private:
+  Float16LogicalType() = default;
+};
+
 /// \brief Allowed for any physical type.
 class PARQUET_EXPORT NoLogicalType : public LogicalType {
  public:
diff --git a/docs/source/cpp/parquet.rst b/docs/source/cpp/parquet.rst
index 23fca8fd73010..3e06352f5dde3 100644
--- a/docs/source/cpp/parquet.rst
+++ b/docs/source/cpp/parquet.rst
@@ -481,6 +481,8 @@ physical type.
 +-------------------+-----------------------------+----------------------------+---------+
 | MAP               | Any                         | Map                        | \(6)    |
 +-------------------+-----------------------------+----------------------------+---------+
+| FLOAT16           | FIXED_LENGTH_BYTE_ARRAY     | HalfFloat                  |         |
++-------------------+-----------------------------+----------------------------+---------+
 
 * \(1) On the write side, the Parquet physical type INT32 is generated.
 

From d076c69e81e5d331bae214a3cf9fabedb17752fa Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 15 Nov 2023 16:15:42 +0100
Subject: [PATCH 12/23] GH-38676: [Python] Fix potential deadlock when CSV
 reading errors out (#38713)

### Rationale for this change

A deadlock can happen in a C++ destructor in the following case:
* the C++ destructor is called from Python, holding the GIL
* the C++ destructor waits for a threaded task to finish
* the threaded task has invoked some Python code which is waiting to acquire the GIL

### What changes are included in this PR?

To reliably present such a deadlock, introduce `std::shared_ptr` and `std::unique_ptr` wrappers that release the GIL when deallocating the embedded pointer.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #38676

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/_csv.pyx                     |  5 +-
 python/pyarrow/_dataset.pxd                 |  8 +--
 python/pyarrow/_dataset.pyx                 |  4 +-
 python/pyarrow/_parquet.pyx                 |  6 +--
 python/pyarrow/includes/libarrow_python.pxd |  8 +++
 python/pyarrow/ipc.pxi                      |  2 +-
 python/pyarrow/lib.pxd                      |  4 +-
 python/pyarrow/src/arrow/python/common.h    | 55 +++++++++++++++++++--
 python/pyarrow/tests/test_csv.py            | 21 ++++++++
 9 files changed, 93 insertions(+), 20 deletions(-)

diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index e532d8d8ab22a..508488c0c3b3c 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -26,8 +26,7 @@ from collections.abc import Mapping
 
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport *
-from pyarrow.includes.libarrow_python cimport (MakeInvalidRowHandler,
-                                               PyInvalidRowCallback)
+from pyarrow.includes.libarrow_python cimport *
 from pyarrow.lib cimport (check_status, Field, MemoryPool, Schema,
                           RecordBatchReader, ensure_type,
                           maybe_unbox_memory_pool, get_input_stream,
@@ -1251,7 +1250,7 @@ def read_csv(input_file, read_options=None, parse_options=None,
         CCSVParseOptions c_parse_options
         CCSVConvertOptions c_convert_options
         CIOContext io_context
-        shared_ptr[CCSVReader] reader
+        SharedPtrNoGIL[CCSVReader] reader
         shared_ptr[CTable] table
 
     _get_reader(input_file, read_options, &stream)
diff --git a/python/pyarrow/_dataset.pxd b/python/pyarrow/_dataset.pxd
index 210e5558009ec..bee9fc1f0987a 100644
--- a/python/pyarrow/_dataset.pxd
+++ b/python/pyarrow/_dataset.pxd
@@ -31,7 +31,7 @@ cdef CFileSource _make_file_source(object file, FileSystem filesystem=*)
 cdef class DatasetFactory(_Weakrefable):
 
     cdef:
-        shared_ptr[CDatasetFactory] wrapped
+        SharedPtrNoGIL[CDatasetFactory] wrapped
         CDatasetFactory* factory
 
     cdef init(self, const shared_ptr[CDatasetFactory]& sp)
@@ -45,7 +45,7 @@ cdef class DatasetFactory(_Weakrefable):
 cdef class Dataset(_Weakrefable):
 
     cdef:
-        shared_ptr[CDataset] wrapped
+        SharedPtrNoGIL[CDataset] wrapped
         CDataset* dataset
         public dict _scan_options
 
@@ -59,7 +59,7 @@ cdef class Dataset(_Weakrefable):
 
 cdef class Scanner(_Weakrefable):
     cdef:
-        shared_ptr[CScanner] wrapped
+        SharedPtrNoGIL[CScanner] wrapped
         CScanner* scanner
 
     cdef void init(self, const shared_ptr[CScanner]& sp)
@@ -122,7 +122,7 @@ cdef class FileWriteOptions(_Weakrefable):
 cdef class Fragment(_Weakrefable):
 
     cdef:
-        shared_ptr[CFragment] wrapped
+        SharedPtrNoGIL[CFragment] wrapped
         CFragment* fragment
 
     cdef void init(self, const shared_ptr[CFragment]& sp)
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 48ee676915311..d7d69965d000a 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -3227,7 +3227,7 @@ cdef class RecordBatchIterator(_Weakrefable):
         object iterator_owner
         # Iterator is a non-POD type and Cython uses offsetof, leading
         # to a compiler warning unless wrapped like so
-        shared_ptr[CRecordBatchIterator] iterator
+        SharedPtrNoGIL[CRecordBatchIterator] iterator
 
     def __init__(self):
         _forbid_instantiation(self.__class__, subclasses_instead=False)
@@ -3273,7 +3273,7 @@ cdef class TaggedRecordBatchIterator(_Weakrefable):
     """An iterator over a sequence of record batches with fragments."""
     cdef:
         object iterator_owner
-        shared_ptr[CTaggedRecordBatchIterator] iterator
+        SharedPtrNoGIL[CTaggedRecordBatchIterator] iterator
 
     def __init__(self):
         _forbid_instantiation(self.__class__, subclasses_instead=False)
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 48091367b2ff8..089ed7c75ce58 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -24,6 +24,7 @@ import warnings
 from cython.operator cimport dereference as deref
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_python cimport *
 from pyarrow.lib cimport (_Weakrefable, Buffer, Schema,
                           check_status,
                           MemoryPool, maybe_unbox_memory_pool,
@@ -1165,7 +1166,7 @@ cdef class ParquetReader(_Weakrefable):
     cdef:
         object source
         CMemoryPool* pool
-        unique_ptr[FileReader] reader
+        UniquePtrNoGIL[FileReader] reader
         FileMetaData _metadata
         shared_ptr[CRandomAccessFile] rd_handle
 
@@ -1334,7 +1335,7 @@ cdef class ParquetReader(_Weakrefable):
             vector[int] c_row_groups
             vector[int] c_column_indices
             shared_ptr[CRecordBatch] record_batch
-            unique_ptr[CRecordBatchReader] recordbatchreader
+            UniquePtrNoGIL[CRecordBatchReader] recordbatchreader
 
         self.set_batch_size(batch_size)
 
@@ -1366,7 +1367,6 @@ cdef class ParquetReader(_Weakrefable):
                 check_status(
                     recordbatchreader.get().ReadNext(&record_batch)
                 )
-
             if record_batch.get() == NULL:
                 break
 
diff --git a/python/pyarrow/includes/libarrow_python.pxd b/python/pyarrow/includes/libarrow_python.pxd
index 4d109fc660e08..b8a3041796f97 100644
--- a/python/pyarrow/includes/libarrow_python.pxd
+++ b/python/pyarrow/includes/libarrow_python.pxd
@@ -261,6 +261,14 @@ cdef extern from "arrow/python/common.h" namespace "arrow::py":
     void RestorePyError(const CStatus& status) except *
 
 
+cdef extern from "arrow/python/common.h" namespace "arrow::py" nogil:
+    cdef cppclass SharedPtrNoGIL[T](shared_ptr[T]):
+        # This looks like the only way to satsify both Cython 2 and Cython 3
+        SharedPtrNoGIL& operator=(...)
+    cdef cppclass UniquePtrNoGIL[T, DELETER=*](unique_ptr[T, DELETER]):
+        UniquePtrNoGIL& operator=(...)
+
+
 cdef extern from "arrow/python/inference.h" namespace "arrow::py":
     c_bool IsPyBool(object o)
     c_bool IsPyInt(object o)
diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi
index fcb9eb729ef04..5d20a4f8b72cb 100644
--- a/python/pyarrow/ipc.pxi
+++ b/python/pyarrow/ipc.pxi
@@ -977,7 +977,7 @@ cdef _wrap_record_batch_with_metadata(CRecordBatchWithMetadata c):
 
 cdef class _RecordBatchFileReader(_Weakrefable):
     cdef:
-        shared_ptr[CRecordBatchFileReader] reader
+        SharedPtrNoGIL[CRecordBatchFileReader] reader
         shared_ptr[CRandomAccessFile] file
         CIpcReadOptions options
 
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 63ebe6aea8233..ae197eca1ca6b 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -552,12 +552,12 @@ cdef class CompressedOutputStream(NativeFile):
 
 cdef class _CRecordBatchWriter(_Weakrefable):
     cdef:
-        shared_ptr[CRecordBatchWriter] writer
+        SharedPtrNoGIL[CRecordBatchWriter] writer
 
 
 cdef class RecordBatchReader(_Weakrefable):
     cdef:
-        shared_ptr[CRecordBatchReader] reader
+        SharedPtrNoGIL[CRecordBatchReader] reader
 
 
 cdef class Codec(_Weakrefable):
diff --git a/python/pyarrow/src/arrow/python/common.h b/python/pyarrow/src/arrow/python/common.h
index bc567ef78e83a..4a7886695eadb 100644
--- a/python/pyarrow/src/arrow/python/common.h
+++ b/python/pyarrow/src/arrow/python/common.h
@@ -19,6 +19,7 @@
 
 #include <functional>
 #include <memory>
+#include <optional>
 #include <utility>
 
 #include "arrow/buffer.h"
@@ -134,13 +135,15 @@ class ARROW_PYTHON_EXPORT PyAcquireGIL {
 // A RAII-style helper that releases the GIL until the end of a lexical block
 class ARROW_PYTHON_EXPORT PyReleaseGIL {
  public:
-  PyReleaseGIL() { saved_state_ = PyEval_SaveThread(); }
-
-  ~PyReleaseGIL() { PyEval_RestoreThread(saved_state_); }
+  PyReleaseGIL() : ptr_(PyEval_SaveThread(), &unique_ptr_deleter) {}
 
  private:
-  PyThreadState* saved_state_;
-  ARROW_DISALLOW_COPY_AND_ASSIGN(PyReleaseGIL);
+  static void unique_ptr_deleter(PyThreadState* state) {
+    if (state) {
+      PyEval_RestoreThread(state);
+    }
+  }
+  std::unique_ptr<PyThreadState, decltype(&unique_ptr_deleter)> ptr_;
 };
 
 // A helper to call safely into the Python interpreter from arbitrary C++ code.
@@ -238,6 +241,48 @@ class ARROW_PYTHON_EXPORT OwnedRefNoGIL : public OwnedRef {
   }
 };
 
+template <template <typename...> typename SmartPtr, typename... Ts>
+class SmartPtrNoGIL : public SmartPtr<Ts...> {
+  using Base = SmartPtr<Ts...>;
+
+ public:
+  template <typename... Args>
+  SmartPtrNoGIL(Args&&... args) : Base(std::forward<Args>(args)...) {}
+
+  ~SmartPtrNoGIL() { reset(); }
+
+  template <typename... Args>
+  void reset(Args&&... args) {
+    auto release_guard = optional_gil_release();
+    Base::reset(std::forward<Args>(args)...);
+  }
+
+  template <typename V>
+  SmartPtrNoGIL& operator=(V&& v) {
+    auto release_guard = optional_gil_release();
+    Base::operator=(std::forward<V>(v));
+    return *this;
+  }
+
+ private:
+  // Only release the GIL if we own an object *and* the Python runtime is
+  // valid *and* the GIL is held.
+  std::optional<PyReleaseGIL> optional_gil_release() const {
+    if (this->get() != nullptr && Py_IsInitialized() && PyGILState_Check()) {
+      return PyReleaseGIL();
+    }
+    return {};
+  }
+};
+
+/// \brief A std::shared_ptr<T, ...> subclass that releases the GIL when destroying T
+template <typename... Ts>
+using SharedPtrNoGIL = SmartPtrNoGIL<std::shared_ptr, Ts...>;
+
+/// \brief A std::unique_ptr<T, ...> subclass that releases the GIL when destroying T
+template <typename... Ts>
+using UniquePtrNoGIL = SmartPtrNoGIL<std::unique_ptr, Ts...>;
+
 template <typename Fn>
 struct BoundFunction;
 
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index afc5380b75516..31f24187e3b37 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -1970,3 +1970,24 @@ def test_write_csv_decimal(tmpdir, type_factory):
     out = read_csv(tmpdir / "out.csv")
 
     assert out.column('col').cast(type) == table.column('col')
+
+
+def test_read_csv_gil_deadlock():
+    # GH-38676
+    # This test depends on several preconditions:
+    # - the CSV input is a Python file object
+    # - reading the CSV file produces an error
+    data = b"a,b,c"
+
+    class MyBytesIO(io.BytesIO):
+        def read(self, *args):
+            time.sleep(0.001)
+            return super().read(*args)
+
+        def readinto(self, *args):
+            time.sleep(0.001)
+            return super().readinto(*args)
+
+    for i in range(20):
+        with pytest.raises(pa.ArrowInvalid):
+            read_csv(MyBytesIO(data))

From dfdebdd9199e51b92ed372220b7f33b1aab2d37b Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Wed, 15 Nov 2023 23:38:34 +0800
Subject: [PATCH 13/23] GH-38503: [Go][Parquet] Style improvement for using
 ArrowColumnWriter (#38581)

### Rationale for this change

Currently, `ArrowColumnWriter` seems not having bug. But the usage is confusing. For nested type,  `ArrowColumnWriter` should considering the logic below:

```
  /// 0 foo.bar
  ///       foo.bar.baz           0
  ///       foo.bar.baz2          1
  ///   foo.qux                   2
  /// 1 foo2                      3
  /// 2 foo3                      4
```

The left column is the column in root of `arrow::Schema`, the parquet itself only stores Leaf node,
so, the column id for parquet is list at right.

In the `ArrowColumnWriter`, the final argument is the LeafIdx in parquet, so, writer should considering
using `leafIdx`. Also, it need a `LeafCount` API for getting the leaf-count here.

### What changes are included in this PR?

Style enhancement for `LeafCount`, `leafIdx` and usage for `ArrowColumnWriter`

### Are these changes tested?

no

### Are there any user-facing changes?

no

* Closes: #38503

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/parquet/internal/encoding/levels.go  | 12 ++++++------
 go/parquet/pqarrow/encode_arrow.go      | 10 +++++++---
 go/parquet/pqarrow/encode_arrow_test.go | 12 +++++++++---
 go/parquet/pqarrow/path_builder.go      |  4 ++--
 4 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/go/parquet/internal/encoding/levels.go b/go/parquet/internal/encoding/levels.go
index caf832059334b..2a6dc24933714 100644
--- a/go/parquet/internal/encoding/levels.go
+++ b/go/parquet/internal/encoding/levels.go
@@ -19,6 +19,7 @@ package encoding
 import (
 	"bytes"
 	"encoding/binary"
+	"errors"
 	"fmt"
 	"math/bits"
 
@@ -28,7 +29,6 @@ import (
 	"github.com/apache/arrow/go/v15/parquet"
 	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
 	"github.com/apache/arrow/go/v15/parquet/internal/utils"
-	"golang.org/x/xerrors"
 )
 
 // LevelEncoder is for handling the encoding of Definition and Repetition levels
@@ -194,12 +194,12 @@ func (l *LevelDecoder) SetData(encoding parquet.Encoding, maxLvl int16, nbuffere
 	switch encoding {
 	case parquet.Encodings.RLE:
 		if len(data) < 4 {
-			return 0, xerrors.New("parquet: received invalid levels (corrupt data page?)")
+			return 0, errors.New("parquet: received invalid levels (corrupt data page?)")
 		}
 
 		nbytes := int32(binary.LittleEndian.Uint32(data[:4]))
 		if nbytes < 0 || nbytes > int32(len(data)-4) {
-			return 0, xerrors.New("parquet: received invalid number of bytes (corrupt data page?)")
+			return 0, errors.New("parquet: received invalid number of bytes (corrupt data page?)")
 		}
 
 		buf := data[4:]
@@ -212,12 +212,12 @@ func (l *LevelDecoder) SetData(encoding parquet.Encoding, maxLvl int16, nbuffere
 	case parquet.Encodings.BitPacked:
 		nbits, ok := overflow.Mul(nbuffered, l.bitWidth)
 		if !ok {
-			return 0, xerrors.New("parquet: number of buffered values too large (corrupt data page?)")
+			return 0, errors.New("parquet: number of buffered values too large (corrupt data page?)")
 		}
 
 		nbytes := bitutil.BytesForBits(int64(nbits))
 		if nbytes < 0 || nbytes > int64(len(data)) {
-			return 0, xerrors.New("parquet: recieved invalid number of bytes (corrupt data page?)")
+			return 0, errors.New("parquet: received invalid number of bytes (corrupt data page?)")
 		}
 		if l.bit == nil {
 			l.bit = utils.NewBitReader(bytes.NewReader(data))
@@ -234,7 +234,7 @@ func (l *LevelDecoder) SetData(encoding parquet.Encoding, maxLvl int16, nbuffere
 // run length encoding.
 func (l *LevelDecoder) SetDataV2(nbytes int32, maxLvl int16, nbuffered int, data []byte) error {
 	if nbytes < 0 {
-		return xerrors.New("parquet: invalid page header (corrupt data page?)")
+		return errors.New("parquet: invalid page header (corrupt data page?)")
 	}
 
 	l.maxLvl = maxLvl
diff --git a/go/parquet/pqarrow/encode_arrow.go b/go/parquet/pqarrow/encode_arrow.go
index 1855d3625adb7..4989837cd03bc 100644
--- a/go/parquet/pqarrow/encode_arrow.go
+++ b/go/parquet/pqarrow/encode_arrow.go
@@ -81,7 +81,7 @@ type ArrowColumnWriter struct {
 //
 // Using an arrow column writer is a convenience to avoid having to process the arrow array yourself
 // and determine the correct definition and repetition levels manually.
-func NewArrowColumnWriter(data *arrow.Chunked, offset, size int64, manifest *SchemaManifest, rgw file.RowGroupWriter, col int) (ArrowColumnWriter, error) {
+func NewArrowColumnWriter(data *arrow.Chunked, offset, size int64, manifest *SchemaManifest, rgw file.RowGroupWriter, leafColIdx int) (ArrowColumnWriter, error) {
 	if data.Len() == 0 {
 		return ArrowColumnWriter{leafCount: calcLeafCount(data.DataType()), rgw: rgw}, nil
 	}
@@ -118,7 +118,7 @@ func NewArrowColumnWriter(data *arrow.Chunked, offset, size int64, manifest *Sch
 	// which is the one this instance will start writing for
 	// colIdx := rgw.CurrentColumn() + 1
 
-	schemaField, err := manifest.GetColumnField(col)
+	schemaField, err := manifest.GetColumnField(leafColIdx)
 	if err != nil {
 		return ArrowColumnWriter{}, err
 	}
@@ -153,7 +153,11 @@ func NewArrowColumnWriter(data *arrow.Chunked, offset, size int64, manifest *Sch
 		values += chunkWriteSize
 	}
 
-	return ArrowColumnWriter{builders: builders, leafCount: leafCount, rgw: rgw, colIdx: col}, nil
+	return ArrowColumnWriter{builders: builders, leafCount: leafCount, rgw: rgw, colIdx: leafColIdx}, nil
+}
+
+func (acw *ArrowColumnWriter) LeafCount() int {
+	return acw.leafCount
 }
 
 func (acw *ArrowColumnWriter) Write(ctx context.Context) error {
diff --git a/go/parquet/pqarrow/encode_arrow_test.go b/go/parquet/pqarrow/encode_arrow_test.go
index d588aff701f3d..712a003c63ad6 100644
--- a/go/parquet/pqarrow/encode_arrow_test.go
+++ b/go/parquet/pqarrow/encode_arrow_test.go
@@ -145,10 +145,12 @@ func TestWriteArrowCols(t *testing.T) {
 	srgw := writer.AppendRowGroup()
 	ctx := pqarrow.NewArrowWriteContext(context.TODO(), nil)
 
+	colIdx := 0
 	for i := int64(0); i < tbl.NumCols(); i++ {
-		acw, err := pqarrow.NewArrowColumnWriter(tbl.Column(int(i)).Data(), 0, tbl.NumRows(), manifest, srgw, int(i))
+		acw, err := pqarrow.NewArrowColumnWriter(tbl.Column(int(i)).Data(), 0, tbl.NumRows(), manifest, srgw, colIdx)
 		require.NoError(t, err)
 		require.NoError(t, acw.Write(ctx))
+		colIdx = colIdx + acw.LeafCount()
 	}
 	require.NoError(t, srgw.Close())
 	require.NoError(t, writer.Close())
@@ -249,10 +251,12 @@ func TestWriteArrowInt96(t *testing.T) {
 	srgw := writer.AppendRowGroup()
 	ctx := pqarrow.NewArrowWriteContext(context.TODO(), &props)
 
+	colIdx := 0
 	for i := int64(0); i < tbl.NumCols(); i++ {
-		acw, err := pqarrow.NewArrowColumnWriter(tbl.Column(int(i)).Data(), 0, tbl.NumRows(), manifest, srgw, int(i))
+		acw, err := pqarrow.NewArrowColumnWriter(tbl.Column(int(i)).Data(), 0, tbl.NumRows(), manifest, srgw, colIdx)
 		require.NoError(t, err)
 		require.NoError(t, acw.Write(ctx))
+		colIdx += acw.LeafCount()
 	}
 	require.NoError(t, srgw.Close())
 	require.NoError(t, writer.Close())
@@ -306,11 +310,13 @@ func writeTableToBuffer(t *testing.T, mem memory.Allocator, tbl arrow.Table, row
 	for offset < tbl.NumRows() {
 		sz := utils.Min(rowGroupSize, tbl.NumRows()-offset)
 		srgw := writer.AppendRowGroup()
+		colIdx := 0
 		for i := 0; i < int(tbl.NumCols()); i++ {
 			col := tbl.Column(i)
-			acw, err := pqarrow.NewArrowColumnWriter(col.Data(), offset, sz, manifest, srgw, i)
+			acw, err := pqarrow.NewArrowColumnWriter(col.Data(), offset, sz, manifest, srgw, colIdx)
 			require.NoError(t, err)
 			require.NoError(t, acw.Write(ctx))
+			colIdx = colIdx + acw.LeafCount()
 		}
 		srgw.Close()
 		offset += sz
diff --git a/go/parquet/pqarrow/path_builder.go b/go/parquet/pqarrow/path_builder.go
index 0f1158bd1e9f0..57a077956edea 100644
--- a/go/parquet/pqarrow/path_builder.go
+++ b/go/parquet/pqarrow/path_builder.go
@@ -206,7 +206,7 @@ func (n *listNode) fillForLast(rng, childRng *elemRange, ctx *pathWriteCtx) iter
 	fillRepLevels(int(childRng.size()), n.repLevel, ctx)
 	// once we've reached this point the following preconditions should hold:
 	// 1. there are no more repeated path nodes to deal with
-	// 2. all elements in |range| reperesent contiguous elements in the child
+	// 2. all elements in |range| represent contiguous elements in the child
 	//    array (null values would have shortened the range to ensure all
 	//    remaining list elements are present, though they may be empty)
 	// 3. no element of range spans a parent list (intermediate list nodes
@@ -225,7 +225,7 @@ func (n *listNode) fillForLast(rng, childRng *elemRange, ctx *pathWriteCtx) iter
 
 		// this is the start of a new list. we can be sure that it only applies to the
 		// previous list (and doesn't jump to the start of any list further up in nesting
-		// due to the contraints mentioned earlier)
+		// due to the constraints mentioned earlier)
 		ctx.AppendRepLevel(n.prevRepLevel)
 		ctx.AppendRepLevels(int(sizeCheck.size())-1, n.repLevel)
 		childRng.end = sizeCheck.end

From 41e45fed07a6466b1f8e9437220c9bf49c4a8cb3 Mon Sep 17 00:00:00 2001
From: Yue <niyue.com@gmail.com>
Date: Thu, 16 Nov 2023 00:22:36 +0800
Subject: [PATCH 14/23] GH-38697: [C++][Gandiva] Use arrow io util to replace
 std::filesystem::path in gandiva (#38698)

### Rationale for this change
AlmaLinux 8 CI reported linker failure when `std::filesystem::path` is used, and This PR tries to it.

### What changes are included in this PR?
Replace replace `std::filesystem::path` in gandiva with arrow's internal io util so that AlmaLinux 8 CI build can work.

### Are these changes tested?
It should be covered by existing tests and CI.

### Are there any user-facing changes?
No
* Closes: #38697

Lead-authored-by: Yue Ni <niyue.com@gmail.com>
Co-authored-by: Yue <niyue.com@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/gandiva/tests/test_util.cc | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/cpp/src/gandiva/tests/test_util.cc b/cpp/src/gandiva/tests/test_util.cc
index 42f67d3824a21..4a0a15c7223db 100644
--- a/cpp/src/gandiva/tests/test_util.cc
+++ b/cpp/src/gandiva/tests/test_util.cc
@@ -17,12 +17,12 @@
 
 #include "gandiva/tests/test_util.h"
 
-#include <filesystem>
+#include "arrow/util/io_util.h"
+#include "arrow/util/logging.h"
 
 namespace gandiva {
 std::shared_ptr<Configuration> TestConfiguration() {
-  auto builder = ConfigurationBuilder();
-  return builder.DefaultConfiguration();
+  return ConfigurationBuilder::DefaultConfiguration();
 }
 
 #ifndef GANDIVA_EXTENSION_TEST_DIR
@@ -30,9 +30,10 @@ std::shared_ptr<Configuration> TestConfiguration() {
 #endif
 
 std::string GetTestFunctionLLVMIRPath() {
-  std::filesystem::path base(GANDIVA_EXTENSION_TEST_DIR);
-  std::filesystem::path ir_file = base / "multiply_by_two.bc";
-  return ir_file.string();
+  const auto base =
+      arrow::internal::PlatformFilename::FromString(GANDIVA_EXTENSION_TEST_DIR);
+  DCHECK_OK(base.status());
+  return base->Join("multiply_by_two.bc")->ToString();
 }
 
 NativeFunction GetTestExternalFunction() {

From 1e7175db8d78313935cd1161728e9ae9dae57c9c Mon Sep 17 00:00:00 2001
From: Tim Schaub <tschaub@users.noreply.github.com>
Date: Wed, 15 Nov 2023 10:16:11 -0700
Subject: [PATCH 15/23] GH-38503: [Go][Parquet] Make the arrow column writer
 internal (#38727)

This makes it so the Arrow column writer is not exported from the `pqarrow` package.  This follows up on comments from #38581.
* Closes: #38503

Authored-by: Tim Schaub <tim@planet.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/parquet/pqarrow/encode_arrow.go      | 26 ++++----
 go/parquet/pqarrow/encode_arrow_test.go | 80 ++++++++++---------------
 go/parquet/pqarrow/file_writer.go       |  2 +-
 3 files changed, 44 insertions(+), 64 deletions(-)

diff --git a/go/parquet/pqarrow/encode_arrow.go b/go/parquet/pqarrow/encode_arrow.go
index 4989837cd03bc..8926d0ba51a07 100644
--- a/go/parquet/pqarrow/encode_arrow.go
+++ b/go/parquet/pqarrow/encode_arrow.go
@@ -65,25 +65,25 @@ func nullableRoot(manifest *SchemaManifest, field *SchemaField) bool {
 	return nullable
 }
 
-// ArrowColumnWriter is a convenience object for easily writing arrow data to a specific
+// arrowColumnWriter is a convenience object for easily writing arrow data to a specific
 // set of columns in a parquet file. Since a single arrow array can itself be a nested type
 // consisting of multiple columns of data, this will write to all of the appropriate leaves in
 // the parquet file, allowing easy writing of nested columns.
-type ArrowColumnWriter struct {
+type arrowColumnWriter struct {
 	builders  []*multipathLevelBuilder
 	leafCount int
 	colIdx    int
 	rgw       file.RowGroupWriter
 }
 
-// NewArrowColumnWriter returns a new writer using the chunked array to determine the number of leaf columns,
+// newArrowColumnWriter returns a new writer using the chunked array to determine the number of leaf columns,
 // and the provided schema manifest to determine the paths for writing the columns.
 //
 // Using an arrow column writer is a convenience to avoid having to process the arrow array yourself
 // and determine the correct definition and repetition levels manually.
-func NewArrowColumnWriter(data *arrow.Chunked, offset, size int64, manifest *SchemaManifest, rgw file.RowGroupWriter, leafColIdx int) (ArrowColumnWriter, error) {
+func newArrowColumnWriter(data *arrow.Chunked, offset, size int64, manifest *SchemaManifest, rgw file.RowGroupWriter, leafColIdx int) (arrowColumnWriter, error) {
 	if data.Len() == 0 {
-		return ArrowColumnWriter{leafCount: calcLeafCount(data.DataType()), rgw: rgw}, nil
+		return arrowColumnWriter{leafCount: calcLeafCount(data.DataType()), rgw: rgw}, nil
 	}
 
 	var (
@@ -109,7 +109,7 @@ func NewArrowColumnWriter(data *arrow.Chunked, offset, size int64, manifest *Sch
 	}
 
 	if absPos >= int64(data.Len()) {
-		return ArrowColumnWriter{}, errors.New("cannot write data at offset past end of chunked array")
+		return arrowColumnWriter{}, errors.New("cannot write data at offset past end of chunked array")
 	}
 
 	leafCount := calcLeafCount(data.DataType())
@@ -120,7 +120,7 @@ func NewArrowColumnWriter(data *arrow.Chunked, offset, size int64, manifest *Sch
 
 	schemaField, err := manifest.GetColumnField(leafColIdx)
 	if err != nil {
-		return ArrowColumnWriter{}, err
+		return arrowColumnWriter{}, err
 	}
 	isNullable = nullableRoot(manifest, schemaField)
 
@@ -138,10 +138,10 @@ func NewArrowColumnWriter(data *arrow.Chunked, offset, size int64, manifest *Sch
 		if arrToWrite.Len() > 0 {
 			bldr, err := newMultipathLevelBuilder(arrToWrite, isNullable)
 			if err != nil {
-				return ArrowColumnWriter{}, nil
+				return arrowColumnWriter{}, nil
 			}
 			if leafCount != bldr.leafCount() {
-				return ArrowColumnWriter{}, fmt.Errorf("data type leaf_count != builder leafcount: %d - %d", leafCount, bldr.leafCount())
+				return arrowColumnWriter{}, fmt.Errorf("data type leaf_count != builder leafcount: %d - %d", leafCount, bldr.leafCount())
 			}
 			builders = append(builders, bldr)
 		}
@@ -153,14 +153,10 @@ func NewArrowColumnWriter(data *arrow.Chunked, offset, size int64, manifest *Sch
 		values += chunkWriteSize
 	}
 
-	return ArrowColumnWriter{builders: builders, leafCount: leafCount, rgw: rgw, colIdx: leafColIdx}, nil
+	return arrowColumnWriter{builders: builders, leafCount: leafCount, rgw: rgw, colIdx: leafColIdx}, nil
 }
 
-func (acw *ArrowColumnWriter) LeafCount() int {
-	return acw.leafCount
-}
-
-func (acw *ArrowColumnWriter) Write(ctx context.Context) error {
+func (acw *arrowColumnWriter) Write(ctx context.Context) error {
 	arrCtx := arrowCtxFromContext(ctx)
 	for leafIdx := 0; leafIdx < acw.leafCount; leafIdx++ {
 		var (
diff --git a/go/parquet/pqarrow/encode_arrow_test.go b/go/parquet/pqarrow/encode_arrow_test.go
index 712a003c63ad6..95ea644dd8013 100644
--- a/go/parquet/pqarrow/encode_arrow_test.go
+++ b/go/parquet/pqarrow/encode_arrow_test.go
@@ -132,28 +132,24 @@ func TestWriteArrowCols(t *testing.T) {
 	tbl := makeDateTimeTypesTable(mem, false, false)
 	defer tbl.Release()
 
-	psc, err := pqarrow.ToParquet(tbl.Schema(), nil, pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem)))
-	require.NoError(t, err)
-
-	manifest, err := pqarrow.NewSchemaManifest(psc, nil, nil)
-	require.NoError(t, err)
-
 	sink := encoding.NewBufferWriter(0, mem)
 	defer sink.Release()
-	writer := file.NewParquetWriter(sink, psc.Root(), file.WithWriterProps(parquet.NewWriterProperties(parquet.WithVersion(parquet.V2_4))))
 
-	srgw := writer.AppendRowGroup()
-	ctx := pqarrow.NewArrowWriteContext(context.TODO(), nil)
+	fileWriter, err := pqarrow.NewFileWriter(
+		tbl.Schema(),
+		sink,
+		parquet.NewWriterProperties(parquet.WithVersion(parquet.V2_4)),
+		pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem)),
+	)
+	require.NoError(t, err)
 
-	colIdx := 0
+	fileWriter.NewRowGroup()
 	for i := int64(0); i < tbl.NumCols(); i++ {
-		acw, err := pqarrow.NewArrowColumnWriter(tbl.Column(int(i)).Data(), 0, tbl.NumRows(), manifest, srgw, colIdx)
+		colChunk := tbl.Column(int(i)).Data()
+		err := fileWriter.WriteColumnChunked(colChunk, 0, int64(colChunk.Len()))
 		require.NoError(t, err)
-		require.NoError(t, acw.Write(ctx))
-		colIdx = colIdx + acw.LeafCount()
 	}
-	require.NoError(t, srgw.Close())
-	require.NoError(t, writer.Close())
+	require.NoError(t, fileWriter.Close())
 
 	expected := makeDateTimeTypesTable(mem, true, false)
 	defer expected.Release()
@@ -235,31 +231,24 @@ func TestWriteArrowInt96(t *testing.T) {
 	tbl := makeDateTimeTypesTable(mem, false, false)
 	defer tbl.Release()
 
-	props := pqarrow.NewArrowWriterProperties(pqarrow.WithDeprecatedInt96Timestamps(true), pqarrow.WithAllocator(mem))
-
-	psc, err := pqarrow.ToParquet(tbl.Schema(), nil, props)
-	require.NoError(t, err)
-
-	manifest, err := pqarrow.NewSchemaManifest(psc, nil, nil)
-	require.NoError(t, err)
-
 	sink := encoding.NewBufferWriter(0, mem)
 	defer sink.Release()
 
-	writer := file.NewParquetWriter(sink, psc.Root(), file.WithWriterProps(parquet.NewWriterProperties(parquet.WithAllocator(mem))))
-
-	srgw := writer.AppendRowGroup()
-	ctx := pqarrow.NewArrowWriteContext(context.TODO(), &props)
+	fileWriter, err := pqarrow.NewFileWriter(
+		tbl.Schema(),
+		sink,
+		parquet.NewWriterProperties(parquet.WithAllocator(mem)),
+		pqarrow.NewArrowWriterProperties(pqarrow.WithDeprecatedInt96Timestamps(true), pqarrow.WithAllocator(mem)),
+	)
+	require.NoError(t, err)
 
-	colIdx := 0
+	fileWriter.NewRowGroup()
 	for i := int64(0); i < tbl.NumCols(); i++ {
-		acw, err := pqarrow.NewArrowColumnWriter(tbl.Column(int(i)).Data(), 0, tbl.NumRows(), manifest, srgw, colIdx)
+		colChunk := tbl.Column(int(i)).Data()
+		err := fileWriter.WriteColumnChunked(colChunk, 0, int64(colChunk.Len()))
 		require.NoError(t, err)
-		require.NoError(t, acw.Write(ctx))
-		colIdx += acw.LeafCount()
 	}
-	require.NoError(t, srgw.Close())
-	require.NoError(t, writer.Close())
+	require.NoError(t, fileWriter.Close())
 
 	expected := makeDateTimeTypesTable(mem, false, false)
 	defer expected.Release()
@@ -296,33 +285,28 @@ func TestWriteArrowInt96(t *testing.T) {
 func writeTableToBuffer(t *testing.T, mem memory.Allocator, tbl arrow.Table, rowGroupSize int64, props pqarrow.ArrowWriterProperties) *memory.Buffer {
 	sink := encoding.NewBufferWriter(0, mem)
 	defer sink.Release()
-	wrprops := parquet.NewWriterProperties(parquet.WithVersion(parquet.V1_0))
-	psc, err := pqarrow.ToParquet(tbl.Schema(), wrprops, props)
-	require.NoError(t, err)
 
-	manifest, err := pqarrow.NewSchemaManifest(psc, nil, nil)
+	fileWriter, err := pqarrow.NewFileWriter(
+		tbl.Schema(),
+		sink,
+		parquet.NewWriterProperties(parquet.WithVersion(parquet.V1_0)),
+		props,
+	)
 	require.NoError(t, err)
 
-	writer := file.NewParquetWriter(sink, psc.Root(), file.WithWriterProps(wrprops))
-	ctx := pqarrow.NewArrowWriteContext(context.TODO(), &props)
-
 	offset := int64(0)
 	for offset < tbl.NumRows() {
 		sz := utils.Min(rowGroupSize, tbl.NumRows()-offset)
-		srgw := writer.AppendRowGroup()
-		colIdx := 0
+		fileWriter.NewRowGroup()
 		for i := 0; i < int(tbl.NumCols()); i++ {
-			col := tbl.Column(i)
-			acw, err := pqarrow.NewArrowColumnWriter(col.Data(), offset, sz, manifest, srgw, colIdx)
+			colChunk := tbl.Column(i).Data()
+			err := fileWriter.WriteColumnChunked(colChunk, 0, int64(colChunk.Len()))
 			require.NoError(t, err)
-			require.NoError(t, acw.Write(ctx))
-			colIdx = colIdx + acw.LeafCount()
 		}
-		srgw.Close()
 		offset += sz
 	}
-	writer.Close()
 
+	require.NoError(t, fileWriter.Close())
 	return sink.Finish()
 }
 
diff --git a/go/parquet/pqarrow/file_writer.go b/go/parquet/pqarrow/file_writer.go
index 21f16c0b67938..bc484ba243f87 100644
--- a/go/parquet/pqarrow/file_writer.go
+++ b/go/parquet/pqarrow/file_writer.go
@@ -305,7 +305,7 @@ func (fw *FileWriter) Close() error {
 // building of writing columns to a file via arrow data without needing to already have
 // a record or table.
 func (fw *FileWriter) WriteColumnChunked(data *arrow.Chunked, offset, size int64) error {
-	acw, err := NewArrowColumnWriter(data, offset, size, fw.manifest, fw.rgw, fw.colIdx)
+	acw, err := newArrowColumnWriter(data, offset, size, fw.manifest, fw.rgw, fw.colIdx)
 	if err != nil {
 		return err
 	}

From 563078fb70f7d23e716a2c1c79e96f7409c02f3f Mon Sep 17 00:00:00 2001
From: James Duong <james.duong@improving.com>
Date: Wed, 15 Nov 2023 11:49:41 -0800
Subject: [PATCH 16/23] GH-38318: [Java][FlightRPC] Enable tests that leaked
 (#38719)

### Rationale for this change
This enables tests that are currently disabled to improve coverage and help others build tests based on these.

### What changes are included in this PR?
- Enable tests that were disabled due to flakey memory leaks
- Explicitly close child allocators in these tests to match
  the behavior of FlightServerTestRule which does not leak.
- Change TestBasicAuth to allocate only one server
- Change TestBasicAuth2 to allocate only one server and client
- Fix a bug in testBasucAuth2#asyncPut() not including credentials

### Are these changes tested?
Tested locally.

### Are there any user-facing changes?
No.
* Closes: #38318

Authored-by: James Duong <james.duong@improving.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../arrow/flight/auth/TestBasicAuth.java      | 29 +++++++++----
 .../arrow/flight/auth2/TestBasicAuth2.java    | 42 +++++++++----------
 .../arrow/flight/TestFlightSqlStreams.java    | 12 +++---
 3 files changed, 46 insertions(+), 37 deletions(-)

diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth/TestBasicAuth.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth/TestBasicAuth.java
index 6544b23dab635..176277866b73a 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth/TestBasicAuth.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth/TestBasicAuth.java
@@ -41,10 +41,11 @@
 import org.apache.arrow.vector.types.Types;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 
 import com.google.common.collect.ImmutableList;
@@ -56,8 +57,8 @@ public class TestBasicAuth {
   private static final byte[] VALID_TOKEN = "my_token".getBytes(StandardCharsets.UTF_8);
 
   private FlightClient client;
-  private FlightServer server;
-  private BufferAllocator allocator;
+  private static FlightServer server;
+  private static BufferAllocator allocator;
 
   @Test
   public void validAuth() {
@@ -65,8 +66,6 @@ public void validAuth() {
     Assertions.assertTrue(ImmutableList.copyOf(client.listFlights(Criteria.ALL)).size() == 0);
   }
 
-  // ARROW-7722: this test occasionally leaks memory
-  @Disabled
   @Test
   public void asyncCall() throws Exception {
     client.authenticateBasic(USERNAME, PASSWORD);
@@ -97,7 +96,12 @@ public void didntAuth() {
   }
 
   @BeforeEach
-  public void setup() throws IOException {
+  public void testSetup() throws IOException {
+    client = FlightClient.builder(allocator, server.getLocation()).build();
+  }
+
+  @BeforeAll
+  public static void setup() throws IOException {
     allocator = new RootAllocator(Long.MAX_VALUE);
     final BasicServerAuthHandler.BasicAuthValidator validator = new BasicServerAuthHandler.BasicAuthValidator() {
 
@@ -149,12 +153,19 @@ public void getStream(CallContext context, Ticket ticket, ServerStreamListener l
             }
           }
         }).authHandler(new BasicServerAuthHandler(validator)).build().start();
-    client = FlightClient.builder(allocator, server.getLocation()).build();
   }
 
   @AfterEach
-  public void shutdown() throws Exception {
-    AutoCloseables.close(client, server, allocator);
+  public void tearDown() throws Exception {
+    AutoCloseables.close(client);
+  }
+
+  @AfterAll
+  public static void shutdown() throws Exception {
+    AutoCloseables.close(server);
+
+    allocator.getChildAllocators().forEach(BufferAllocator::close);
+    AutoCloseables.close(allocator);
   }
 
 }
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth2/TestBasicAuth2.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth2/TestBasicAuth2.java
index 4ccc73fcac10e..cadd67d3ed241 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth2/TestBasicAuth2.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth2/TestBasicAuth2.java
@@ -41,10 +41,9 @@
 import org.apache.arrow.vector.types.Types;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
-import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 
 import com.google.common.base.Strings;
@@ -57,18 +56,18 @@ public class TestBasicAuth2 {
   private static final String NO_USERNAME = "";
   private static final String PASSWORD_1 = "woohoo1";
   private static final String PASSWORD_2 = "woohoo2";
-  private BufferAllocator allocator;
-  private FlightServer server;
-  private FlightClient client;
-  private FlightClient client2;
+  private static BufferAllocator allocator;
+  private static FlightServer server;
+  private static FlightClient client;
+  private static FlightClient client2;
 
-  @BeforeEach
-  public void setup() throws Exception {
+  @BeforeAll
+  public static void setup() throws Exception {
     allocator = new RootAllocator(Long.MAX_VALUE);
     startServerAndClient();
   }
 
-  private FlightProducer getFlightProducer() {
+  private static FlightProducer getFlightProducer() {
     return new NoOpFlightProducer() {
       @Override
       public void listFlights(CallContext context, Criteria criteria,
@@ -99,23 +98,26 @@ public void getStream(CallContext context, Ticket ticket, ServerStreamListener l
     };
   }
 
-  private void startServerAndClient() throws IOException {
+  private static void startServerAndClient() throws IOException {
     final FlightProducer flightProducer = getFlightProducer();
-    this.server = FlightServer
+    server = FlightServer
         .builder(allocator, forGrpcInsecure(LOCALHOST, 0), flightProducer)
         .headerAuthenticator(new GeneratedBearerTokenAuthenticator(
-            new BasicCallHeaderAuthenticator(this::validate)))
+            new BasicCallHeaderAuthenticator(TestBasicAuth2::validate)))
         .build().start();
-    this.client = FlightClient.builder(allocator, server.getLocation())
+    client = FlightClient.builder(allocator, server.getLocation())
         .build();
   }
 
-  @AfterEach
-  public void shutdown() throws Exception {
-    AutoCloseables.close(client, client2, server, allocator);
+  @AfterAll
+  public static void shutdown() throws Exception {
+    AutoCloseables.close(client, client2, server);
     client = null;
     client2 = null;
     server = null;
+
+    allocator.getChildAllocators().forEach(BufferAllocator::close);
+    AutoCloseables.close(allocator);
     allocator = null;
   }
 
@@ -124,7 +126,7 @@ private void startClient2() throws IOException {
         .build();
   }
 
-  private CallHeaderAuthenticator.AuthResult validate(String username, String password) {
+  private static CallHeaderAuthenticator.AuthResult validate(String username, String password) {
     if (Strings.isNullOrEmpty(username)) {
       throw CallStatus.UNAUTHENTICATED.withDescription("Credentials not supplied.").toRuntimeException();
     }
@@ -156,14 +158,12 @@ public void validAuthWithMultipleClientsWithDifferentCredentialsWithBearerAuthSe
     testValidAuthWithMultipleClientsWithDifferentCredentials(client, client2);
   }
 
-  // ARROW-7722: this test occasionally leaks memory
-  @Disabled
   @Test
   public void asyncCall() throws Exception {
     final CredentialCallOption bearerToken = client
         .authenticateBasicToken(USERNAME_1, PASSWORD_1).get();
     client.listFlights(Criteria.ALL, bearerToken);
-    try (final FlightStream s = client.getStream(new Ticket(new byte[1]))) {
+    try (final FlightStream s = client.getStream(new Ticket(new byte[1]), bearerToken)) {
       while (s.next()) {
         Assertions.assertEquals(4095, s.getRoot().getRowCount());
       }
diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/TestFlightSqlStreams.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/TestFlightSqlStreams.java
index 11d00742fd243..1dd925eb53add 100644
--- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/TestFlightSqlStreams.java
+++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/TestFlightSqlStreams.java
@@ -46,7 +46,6 @@
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 
 import com.google.common.collect.ImmutableList;
@@ -209,10 +208,13 @@ public static void setUp() throws Exception {
 
   @AfterAll
   public static void tearDown() throws Exception {
-    close(sqlClient, server, allocator);
+    close(sqlClient, server);
+
+    // Manually close all child allocators.
+    allocator.getChildAllocators().forEach(BufferAllocator::close);
+    close(allocator);
   }
 
-  @Disabled("Memory leak GH-38268")
   @Test
   public void testGetTablesResultNoSchema() throws Exception {
     try (final FlightStream stream =
@@ -232,7 +234,6 @@ public void testGetTablesResultNoSchema() throws Exception {
     }
   }
 
-  @Disabled("Memory leak GH-38268")
   @Test
   public void testGetTableTypesResult() throws Exception {
     try (final FlightStream stream =
@@ -251,7 +252,6 @@ public void testGetTableTypesResult() throws Exception {
     }
   }
 
-  @Disabled("Memory leak GH-38268")
   @Test
   public void testGetSqlInfoResults() throws Exception {
     final FlightInfo info = sqlClient.getSqlInfo();
@@ -263,7 +263,6 @@ public void testGetSqlInfoResults() throws Exception {
     }
   }
 
-  @Disabled("Memory leak GH-38268")
   @Test
   public void testGetTypeInfo() throws Exception {
     FlightInfo flightInfo = sqlClient.getXdbcTypeInfo();
@@ -280,7 +279,6 @@ public void testGetTypeInfo() throws Exception {
     }
   }
 
-  @Disabled("Memory leak GH-38268")
   @Test
   public void testExecuteQuery() throws Exception {
     try (final FlightStream stream = sqlClient

From 0e52d308c567b400d6599f2f2b238a3520b817bb Mon Sep 17 00:00:00 2001
From: Francis <455954986@qq.com>
Date: Thu, 16 Nov 2023 05:09:17 +0800
Subject: [PATCH 17/23] GH-38624: [C++] Fix: add TestingEqualOptions for gtest
 functions. (#38642)

### Rationale for this change

some other interfaces lack equal options, suck as nan equal, so we add TestingEqualOptions for each function and add some testing.

### What changes are included in this PR?

gtest_util related.

### Are these changes tested?

gtest_util.cc

### Are there any user-facing changes?
yes.
* Closes: #38624

Authored-by: light-city <455954986@qq.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/chunked_array.cc           |  13 ++-
 cpp/src/arrow/chunked_array.h            |   6 +-
 cpp/src/arrow/testing/CMakeLists.txt     |   1 +
 cpp/src/arrow/testing/gtest_util.cc      |  55 +++++----
 cpp/src/arrow/testing/gtest_util.h       |  48 ++++----
 cpp/src/arrow/testing/gtest_util_test.cc | 137 +++++++++++++++++++++++
 6 files changed, 207 insertions(+), 53 deletions(-)
 create mode 100644 cpp/src/arrow/testing/gtest_util_test.cc

diff --git a/cpp/src/arrow/chunked_array.cc b/cpp/src/arrow/chunked_array.cc
index 12937406e7800..c36b736d5d5df 100644
--- a/cpp/src/arrow/chunked_array.cc
+++ b/cpp/src/arrow/chunked_array.cc
@@ -86,7 +86,7 @@ Result<std::shared_ptr<ChunkedArray>> ChunkedArray::MakeEmpty(
   return std::make_shared<ChunkedArray>(std::move(new_chunks));
 }
 
-bool ChunkedArray::Equals(const ChunkedArray& other) const {
+bool ChunkedArray::Equals(const ChunkedArray& other, const EqualOptions& opts) const {
   if (length_ != other.length()) {
     return false;
   }
@@ -102,9 +102,9 @@ bool ChunkedArray::Equals(const ChunkedArray& other) const {
   // the underlying data independently of the chunk size.
   return internal::ApplyBinaryChunked(
              *this, other,
-             [](const Array& left_piece, const Array& right_piece,
-                int64_t ARROW_ARG_UNUSED(position)) {
-               if (!left_piece.Equals(right_piece)) {
+             [&](const Array& left_piece, const Array& right_piece,
+                 int64_t ARROW_ARG_UNUSED(position)) {
+               if (!left_piece.Equals(right_piece, opts)) {
                  return Status::Invalid("Unequal piece");
                }
                return Status::OK();
@@ -129,14 +129,15 @@ bool mayHaveNaN(const arrow::DataType& type) {
 
 }  //  namespace
 
-bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
+bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other,
+                          const EqualOptions& opts) const {
   if (!other) {
     return false;
   }
   if (this == other.get() && !mayHaveNaN(*type_)) {
     return true;
   }
-  return Equals(*other.get());
+  return Equals(*other.get(), opts);
 }
 
 bool ChunkedArray::ApproxEquals(const ChunkedArray& other,
diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h
index 6ec7d11ac839d..5d300861d85c2 100644
--- a/cpp/src/arrow/chunked_array.h
+++ b/cpp/src/arrow/chunked_array.h
@@ -152,9 +152,11 @@ class ARROW_EXPORT ChunkedArray {
   ///
   /// Two chunked arrays can be equal only if they have equal datatypes.
   /// However, they may be equal even if they have different chunkings.
-  bool Equals(const ChunkedArray& other) const;
+  bool Equals(const ChunkedArray& other,
+              const EqualOptions& opts = EqualOptions::Defaults()) const;
   /// \brief Determine if two chunked arrays are equal.
-  bool Equals(const std::shared_ptr<ChunkedArray>& other) const;
+  bool Equals(const std::shared_ptr<ChunkedArray>& other,
+              const EqualOptions& opts = EqualOptions::Defaults()) const;
   /// \brief Determine if two chunked arrays approximately equal
   bool ApproxEquals(const ChunkedArray& other,
                     const EqualOptions& = EqualOptions::Defaults()) const;
diff --git a/cpp/src/arrow/testing/CMakeLists.txt b/cpp/src/arrow/testing/CMakeLists.txt
index d5332405964ba..59825f0bf227a 100644
--- a/cpp/src/arrow/testing/CMakeLists.txt
+++ b/cpp/src/arrow/testing/CMakeLists.txt
@@ -19,4 +19,5 @@ arrow_install_all_headers("arrow/testing")
 
 if(ARROW_BUILD_TESTS)
   add_arrow_test(random_test)
+  add_arrow_test(gtest_util_test)
 endif()
diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index a6dc1d59c67a9..5ef1820d5b581 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -145,42 +145,46 @@ void AssertScalarsApproxEqual(const Scalar& expected, const Scalar& actual, bool
 }
 
 void AssertBatchesEqual(const RecordBatch& expected, const RecordBatch& actual,
-                        bool check_metadata) {
+                        bool check_metadata, const EqualOptions& options) {
   AssertTsSame(expected, actual,
                [&](const RecordBatch& expected, const RecordBatch& actual) {
-                 return expected.Equals(actual, check_metadata);
+                 return expected.Equals(actual, check_metadata, options);
                });
 }
 
-void AssertBatchesApproxEqual(const RecordBatch& expected, const RecordBatch& actual) {
+void AssertBatchesApproxEqual(const RecordBatch& expected, const RecordBatch& actual,
+                              const EqualOptions& options) {
   AssertTsSame(expected, actual,
                [&](const RecordBatch& expected, const RecordBatch& actual) {
-                 return expected.ApproxEquals(actual);
+                 return expected.ApproxEquals(actual, options);
                });
 }
 
-void AssertChunkedEqual(const ChunkedArray& expected, const ChunkedArray& actual) {
+void AssertChunkedEqual(const ChunkedArray& expected, const ChunkedArray& actual,
+                        const EqualOptions& options) {
   ASSERT_EQ(expected.num_chunks(), actual.num_chunks()) << "# chunks unequal";
-  if (!actual.Equals(expected)) {
+  if (!actual.Equals(expected, options)) {
     std::stringstream diff;
     for (int i = 0; i < actual.num_chunks(); ++i) {
       auto c1 = actual.chunk(i);
       auto c2 = expected.chunk(i);
       diff << "# chunk " << i << std::endl;
-      ARROW_IGNORE_EXPR(c1->Equals(c2, EqualOptions().diff_sink(&diff)));
+      ARROW_IGNORE_EXPR(c1->Equals(c2, options.diff_sink(&diff)));
     }
     FAIL() << diff.str();
   }
 }
 
-void AssertChunkedEqual(const ChunkedArray& actual, const ArrayVector& expected) {
-  AssertChunkedEqual(ChunkedArray(expected, actual.type()), actual);
+void AssertChunkedEqual(const ChunkedArray& actual, const ArrayVector& expected,
+                        const EqualOptions& options) {
+  AssertChunkedEqual(ChunkedArray(expected, actual.type()), actual, options);
 }
 
-void AssertChunkedEquivalent(const ChunkedArray& expected, const ChunkedArray& actual) {
+void AssertChunkedEquivalent(const ChunkedArray& expected, const ChunkedArray& actual,
+                             const EqualOptions& options) {
   // XXX: AssertChunkedEqual in gtest_util.h does not permit the chunk layouts
   // to be different
-  if (!actual.Equals(expected)) {
+  if (!actual.Equals(expected, options)) {
     std::stringstream pp_expected;
     std::stringstream pp_actual;
     ::arrow::PrettyPrintOptions options(/*indent=*/2);
@@ -321,21 +325,23 @@ ASSERT_EQUAL_IMPL(Field, Field, "fields")
 ASSERT_EQUAL_IMPL(Schema, Schema, "schemas")
 #undef ASSERT_EQUAL_IMPL
 
-void AssertDatumsEqual(const Datum& expected, const Datum& actual, bool verbose) {
+void AssertDatumsEqual(const Datum& expected, const Datum& actual, bool verbose,
+                       const EqualOptions& options) {
   ASSERT_EQ(expected.kind(), actual.kind())
       << "expected:" << expected.ToString() << " got:" << actual.ToString();
 
   switch (expected.kind()) {
     case Datum::SCALAR:
-      AssertScalarsEqual(*expected.scalar(), *actual.scalar(), verbose);
+      AssertScalarsEqual(*expected.scalar(), *actual.scalar(), verbose, options);
       break;
     case Datum::ARRAY: {
       auto expected_array = expected.make_array();
       auto actual_array = actual.make_array();
-      AssertArraysEqual(*expected_array, *actual_array, verbose);
+      AssertArraysEqual(*expected_array, *actual_array, verbose, options);
     } break;
     case Datum::CHUNKED_ARRAY:
-      AssertChunkedEquivalent(*expected.chunked_array(), *actual.chunked_array());
+      AssertChunkedEquivalent(*expected.chunked_array(), *actual.chunked_array(),
+                              options);
       break;
     default:
       // TODO: Implement better print
@@ -479,7 +485,7 @@ Result<std::optional<std::string>> PrintArrayDiff(const ChunkedArray& expected,
 }
 
 void AssertTablesEqual(const Table& expected, const Table& actual, bool same_chunk_layout,
-                       bool combine_chunks) {
+                       bool combine_chunks, const EqualOptions& options) {
   ASSERT_EQ(expected.num_columns(), actual.num_columns());
 
   if (combine_chunks) {
@@ -487,13 +493,13 @@ void AssertTablesEqual(const Table& expected, const Table& actual, bool same_chu
     ASSERT_OK_AND_ASSIGN(auto new_expected, expected.CombineChunks(pool));
     ASSERT_OK_AND_ASSIGN(auto new_actual, actual.CombineChunks(pool));
 
-    AssertTablesEqual(*new_expected, *new_actual, false, false);
+    AssertTablesEqual(*new_expected, *new_actual, false, false, options);
     return;
   }
 
   if (same_chunk_layout) {
     for (int i = 0; i < actual.num_columns(); ++i) {
-      AssertChunkedEqual(*expected.column(i), *actual.column(i));
+      AssertChunkedEqual(*expected.column(i), *actual.column(i), options);
     }
   } else {
     std::stringstream ss;
@@ -533,17 +539,18 @@ void CompareBatchWith(const RecordBatch& left, const RecordBatch& right,
 }
 
 void CompareBatch(const RecordBatch& left, const RecordBatch& right,
-                  bool compare_metadata) {
+                  bool compare_metadata, const EqualOptions& options) {
   return CompareBatchWith(
       left, right, compare_metadata,
-      [](const Array& left, const Array& right) { return left.Equals(right); });
+      [&](const Array& left, const Array& right) { return left.Equals(right, options); });
 }
 
 void ApproxCompareBatch(const RecordBatch& left, const RecordBatch& right,
-                        bool compare_metadata) {
-  return CompareBatchWith(
-      left, right, compare_metadata,
-      [](const Array& left, const Array& right) { return left.ApproxEquals(right); });
+                        bool compare_metadata, const EqualOptions& options) {
+  return CompareBatchWith(left, right, compare_metadata,
+                          [&](const Array& left, const Array& right) {
+                            return left.ApproxEquals(right, options);
+                          });
 }
 
 std::shared_ptr<Array> TweakValidityBit(const std::shared_ptr<Array>& array,
diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h
index 641aae5a5e2e4..916067d85b753 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -221,18 +221,22 @@ ARROW_TESTING_EXPORT void AssertScalarsEqual(
 ARROW_TESTING_EXPORT void AssertScalarsApproxEqual(
     const Scalar& expected, const Scalar& actual, bool verbose = false,
     const EqualOptions& options = TestingEqualOptions());
-ARROW_TESTING_EXPORT void AssertBatchesEqual(const RecordBatch& expected,
-                                             const RecordBatch& actual,
-                                             bool check_metadata = false);
-ARROW_TESTING_EXPORT void AssertBatchesApproxEqual(const RecordBatch& expected,
-                                                   const RecordBatch& actual);
-ARROW_TESTING_EXPORT void AssertChunkedEqual(const ChunkedArray& expected,
-                                             const ChunkedArray& actual);
-ARROW_TESTING_EXPORT void AssertChunkedEqual(const ChunkedArray& actual,
-                                             const ArrayVector& expected);
+ARROW_TESTING_EXPORT void AssertBatchesEqual(
+    const RecordBatch& expected, const RecordBatch& actual, bool check_metadata = false,
+    const EqualOptions& options = TestingEqualOptions());
+ARROW_TESTING_EXPORT void AssertBatchesApproxEqual(
+    const RecordBatch& expected, const RecordBatch& actual,
+    const EqualOptions& options = TestingEqualOptions());
+ARROW_TESTING_EXPORT void AssertChunkedEqual(
+    const ChunkedArray& expected, const ChunkedArray& actual,
+    const EqualOptions& options = TestingEqualOptions());
+ARROW_TESTING_EXPORT void AssertChunkedEqual(
+    const ChunkedArray& actual, const ArrayVector& expected,
+    const EqualOptions& options = TestingEqualOptions());
 // Like ChunkedEqual, but permits different chunk layout
-ARROW_TESTING_EXPORT void AssertChunkedEquivalent(const ChunkedArray& expected,
-                                                  const ChunkedArray& actual);
+ARROW_TESTING_EXPORT void AssertChunkedEquivalent(
+    const ChunkedArray& expected, const ChunkedArray& actual,
+    const EqualOptions& options = TestingEqualOptions());
 ARROW_TESTING_EXPORT void AssertChunkedApproxEquivalent(
     const ChunkedArray& expected, const ChunkedArray& actual,
     const EqualOptions& options = TestingEqualOptions());
@@ -277,12 +281,13 @@ ARROW_TESTING_EXPORT void AssertSchemaNotEqual(const std::shared_ptr<Schema>& lh
 ARROW_TESTING_EXPORT Result<std::optional<std::string>> PrintArrayDiff(
     const ChunkedArray& expected, const ChunkedArray& actual);
 
-ARROW_TESTING_EXPORT void AssertTablesEqual(const Table& expected, const Table& actual,
-                                            bool same_chunk_layout = true,
-                                            bool flatten = false);
+ARROW_TESTING_EXPORT void AssertTablesEqual(
+    const Table& expected, const Table& actual, bool same_chunk_layout = true,
+    bool flatten = false, const EqualOptions& options = TestingEqualOptions());
 
-ARROW_TESTING_EXPORT void AssertDatumsEqual(const Datum& expected, const Datum& actual,
-                                            bool verbose = false);
+ARROW_TESTING_EXPORT void AssertDatumsEqual(
+    const Datum& expected, const Datum& actual, bool verbose = false,
+    const EqualOptions& options = TestingEqualOptions());
 ARROW_TESTING_EXPORT void AssertDatumsApproxEqual(
     const Datum& expected, const Datum& actual, bool verbose = false,
     const EqualOptions& options = TestingEqualOptions());
@@ -296,12 +301,13 @@ void AssertNumericDataEqual(const C_TYPE* raw_data,
   }
 }
 
-ARROW_TESTING_EXPORT void CompareBatch(const RecordBatch& left, const RecordBatch& right,
-                                       bool compare_metadata = true);
+ARROW_TESTING_EXPORT void CompareBatch(
+    const RecordBatch& left, const RecordBatch& right, bool compare_metadata = true,
+    const EqualOptions& options = TestingEqualOptions());
 
-ARROW_TESTING_EXPORT void ApproxCompareBatch(const RecordBatch& left,
-                                             const RecordBatch& right,
-                                             bool compare_metadata = true);
+ARROW_TESTING_EXPORT void ApproxCompareBatch(
+    const RecordBatch& left, const RecordBatch& right, bool compare_metadata = true,
+    const EqualOptions& options = TestingEqualOptions());
 
 // Check if the padding of the buffers of the array is zero.
 // Also cause valgrind warnings if the padding bytes are uninitialized.
diff --git a/cpp/src/arrow/testing/gtest_util_test.cc b/cpp/src/arrow/testing/gtest_util_test.cc
new file mode 100644
index 0000000000000..14c17a972aa06
--- /dev/null
+++ b/cpp/src/arrow/testing/gtest_util_test.cc
@@ -0,0 +1,137 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "arrow/array.h"
+#include "arrow/array/builder_decimal.h"
+#include "arrow/datum.h"
+#include "arrow/record_batch.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+
+namespace arrow {
+
+// Test basic cases for contains NaN.
+class TestAssertContainsNaN : public ::testing::Test {};
+
+TEST_F(TestAssertContainsNaN, BatchesEqual) {
+  auto schema = ::arrow::schema({
+      {field("a", float32())},
+      {field("b", float64())},
+  });
+
+  auto expected = RecordBatchFromJSON(schema,
+                                      R"([{"a": 3,    "b": 5},
+                                       {"a": 1,    "b": 3},
+                                       {"a": 3,    "b": 4},
+                                       {"a": NaN,  "b": 6},
+                                       {"a": 2,    "b": 5},
+                                       {"a": 1,    "b": NaN},
+                                       {"a": 1,    "b": 3}
+                                       ])");
+  auto actual = RecordBatchFromJSON(schema,
+                                    R"([{"a": 3,    "b": 5},
+                                       {"a": 1,    "b": 3},
+                                       {"a": 3,    "b": 4},
+                                       {"a": NaN,  "b": 6},
+                                       {"a": 2,    "b": 5},
+                                       {"a": 1,    "b": NaN},
+                                       {"a": 1,    "b": 3}
+                                       ])");
+  ASSERT_BATCHES_EQUAL(*expected, *actual);
+  AssertBatchesApproxEqual(*expected, *actual);
+}
+
+TEST_F(TestAssertContainsNaN, TableEqual) {
+  auto schema = ::arrow::schema({
+      {field("a", float32())},
+      {field("b", float64())},
+  });
+
+  auto expected = TableFromJSON(schema, {R"([{"a": null, "b": 5},
+                                     {"a": NaN,    "b": 3},
+                                     {"a": 3,    "b": null}
+                                    ])",
+                                         R"([{"a": null, "b": null},
+                                     {"a": 2,    "b": NaN},
+                                     {"a": 1,    "b": 5},
+                                     {"a": 3,    "b": 5}
+                                    ])"});
+  auto actual = TableFromJSON(schema, {R"([{"a": null, "b": 5},
+                                     {"a": NaN,    "b": 3},
+                                     {"a": 3,    "b": null}
+                                    ])",
+                                       R"([{"a": null, "b": null},
+                                     {"a": 2,    "b": NaN},
+                                     {"a": 1,    "b": 5},
+                                     {"a": 3,    "b": 5}
+                                    ])"});
+  ASSERT_TABLES_EQUAL(*expected, *actual);
+}
+
+TEST_F(TestAssertContainsNaN, ArrayEqual) {
+  auto expected = ArrayFromJSON(float64(), "[0, 1, 2, NaN]");
+  auto actual = ArrayFromJSON(float64(), "[0, 1, 2, NaN]");
+  AssertArraysEqual(*expected, *actual);
+}
+
+TEST_F(TestAssertContainsNaN, ChunkedEqual) {
+  auto expected = ChunkedArrayFromJSON(float64(), {
+                                                      "[null, 1]",
+                                                      "[3, NaN, 2]",
+                                                      "[NaN]",
+                                                  });
+
+  auto actual = ChunkedArrayFromJSON(float64(), {
+                                                    "[null, 1]",
+                                                    "[3, NaN, 2]",
+                                                    "[NaN]",
+                                                });
+  AssertChunkedEqual(*expected, *actual);
+}
+
+TEST_F(TestAssertContainsNaN, DatumEqual) {
+  // scalar
+  auto expected_scalar = ScalarFromJSON(float64(), "NaN");
+  auto actual_scalar = ScalarFromJSON(float64(), "NaN");
+  AssertDatumsEqual(expected_scalar, actual_scalar);
+
+  // array
+  auto expected_array = ArrayFromJSON(float64(), "[3, NaN, 2, 1, 5]");
+  auto actual_array = ArrayFromJSON(float64(), "[3, NaN, 2, 1, 5]");
+  AssertDatumsEqual(expected_array, actual_array);
+
+  // chunked array
+  auto expected_chunked = ChunkedArrayFromJSON(float64(), {
+                                                              "[null, 1]",
+                                                              "[3, NaN, 2]",
+                                                              "[NaN]",
+                                                          });
+
+  auto actual_chunked = ChunkedArrayFromJSON(float64(), {
+                                                            "[null, 1]",
+                                                            "[3, NaN, 2]",
+                                                            "[NaN]",
+                                                        });
+  AssertDatumsEqual(expected_chunked, actual_chunked);
+}
+
+}  // namespace arrow

From 62cf42b5074a851a59438ec43177684ed1aac660 Mon Sep 17 00:00:00 2001
From: Anja Kefala <anja@voltrondata.com>
Date: Thu, 16 Nov 2023 05:57:12 -0500
Subject: [PATCH 18/23] MINOR: [Docs] document Float16 support in C++ (#38494)

### Rationale for this change

There is, at least, partial halffloat support in Arrow C++. A reason to not merge this is that there is still quite a bit casting work to be completed:
* https://github.com/apache/arrow/issues/20213
* https://github.com/apache/arrow/issues/32802

Authored-by: anjakefala <anja@voltrondata.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 docs/source/status.rst | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/docs/source/status.rst b/docs/source/status.rst
index c059ab3cef971..fee9a27b6ca1a 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -40,7 +40,7 @@ Data Types
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | UInt8/16/32/64    | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| Float16           |       |       | ✓     | ✓          |  ✓ (1)|  ✓    | ✓     |       |
+| Float16           | ✓ (1) |       | ✓     | ✓          |  ✓ (2)|  ✓    | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Float32/64        | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
@@ -96,7 +96,7 @@ Data Types
 | Data type         | C++   | Java  | Go    | JavaScript | C#    | Rust  | Julia | Swift |
 | (special)         |       |       |       |            |       |       |       |       |
 +===================+=======+=======+=======+============+=======+=======+=======+=======+
-| Dictionary        | ✓     | ✓ (2) | ✓     | ✓          | ✓ (2) | ✓ (2) | ✓     |       |
+| Dictionary        | ✓     | ✓ (3) | ✓     | ✓          | ✓ (3) | ✓ (3) | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Extension         | ✓     | ✓     | ✓     |            |       | ✓     | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
@@ -105,8 +105,9 @@ Data Types
 
 Notes:
 
-* \(1) Float16 support in C# is only available when targeting .NET 6+.
-* \(2) Nested dictionaries not supported
+* \(1) Casting to/from Float16 in C++ is not supported.
+* \(2) Float16 support in C# is only available when targeting .NET 6+.
+* \(3) Nested dictionaries not supported
 
 .. seealso::
    The :ref:`format_columnar` specification.

From 3e0ca5b7fd7c0fbe0dbec6c4bf5652a177baefad Mon Sep 17 00:00:00 2001
From: Quang Hoang <quanghgx@gmail.com>
Date: Thu, 16 Nov 2023 19:36:09 +0700
Subject: [PATCH 19/23] GH-37969: [C++][Parquet] add more closed file checks
 for ParquetFileWriter (#38390)

### Rationale for this change
Operations on closed ParquetFileWriter are not allowed, but should not segfault. Somehow, ParquetFileWriter::Close() also reset its pimpl, so after that, any operators, those need this pointer will lead to segfault

### What changes are included in this PR?
Adding more checks for closed file.

### Are these changes tested?
Yes.

### Are there any user-facing changes?
No.

* Closes: #37969

Authored-by: Quang Hoang <quanghgx@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../parquet/arrow/arrow_reader_writer_test.cc | 27 +++++++++++++++++++
 cpp/src/parquet/arrow/writer.cc               | 12 +++++++++
 cpp/src/parquet/file_writer.cc                |  6 ++++-
 3 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index fb9e53870583c..a314ecbf747e7 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -5221,6 +5221,33 @@ TEST(TestArrowReadWrite, FuzzReader) {
   }
 }
 
+// Test writing table with a closed writer, should not segfault (GH-37969).
+TEST(TestArrowReadWrite, OperationsOnClosedWriter) {
+  // A sample table, type and structure does not matter in this test case
+  auto schema = ::arrow::schema({::arrow::field("letter", ::arrow::utf8())});
+  auto table = ::arrow::Table::Make(
+      schema, {::arrow::ArrayFromJSON(::arrow::utf8(), R"(["a", "b", "c"])")});
+
+  auto sink = CreateOutputStream();
+  ASSERT_OK_AND_ASSIGN(auto writer, parquet::arrow::FileWriter::Open(
+                                        *schema, ::arrow::default_memory_pool(), sink,
+                                        parquet::default_writer_properties(),
+                                        parquet::default_arrow_writer_properties()));
+
+  // Should be ok
+  ASSERT_OK(writer->WriteTable(*table, 1));
+
+  // Operations on closed writer are invalid
+  ASSERT_OK(writer->Close());
+
+  ASSERT_RAISES(Invalid, writer->NewRowGroup(1));
+  ASSERT_RAISES(Invalid, writer->WriteColumnChunk(table->column(0), 0, 1));
+  ASSERT_RAISES(Invalid, writer->NewBufferedRowGroup());
+  ASSERT_OK_AND_ASSIGN(auto record_batch, table->CombineChunksToBatch());
+  ASSERT_RAISES(Invalid, writer->WriteRecordBatch(*record_batch));
+  ASSERT_RAISES(Invalid, writer->WriteTable(*table, 1));
+}
+
 namespace {
 
 struct ColumnIndexObject {
diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc
index 0c67e8d6bb3d4..300a6d8e054cc 100644
--- a/cpp/src/parquet/arrow/writer.cc
+++ b/cpp/src/parquet/arrow/writer.cc
@@ -306,6 +306,7 @@ class FileWriterImpl : public FileWriter {
   }
 
   Status NewRowGroup(int64_t chunk_size) override {
+    RETURN_NOT_OK(CheckClosed());
     if (row_group_writer_ != nullptr) {
       PARQUET_CATCH_NOT_OK(row_group_writer_->Close());
     }
@@ -325,6 +326,13 @@ class FileWriterImpl : public FileWriter {
     return Status::OK();
   }
 
+  Status CheckClosed() const {
+    if (closed_) {
+      return Status::Invalid("Operation on closed file");
+    }
+    return Status::OK();
+  }
+
   Status WriteColumnChunk(const Array& data) override {
     // A bit awkward here since cannot instantiate ChunkedArray from const Array&
     auto chunk = ::arrow::MakeArray(data.data());
@@ -334,6 +342,7 @@ class FileWriterImpl : public FileWriter {
 
   Status WriteColumnChunk(const std::shared_ptr<ChunkedArray>& data, int64_t offset,
                           int64_t size) override {
+    RETURN_NOT_OK(CheckClosed());
     if (arrow_properties_->engine_version() == ArrowWriterProperties::V2 ||
         arrow_properties_->engine_version() == ArrowWriterProperties::V1) {
       if (row_group_writer_->buffered()) {
@@ -356,6 +365,7 @@ class FileWriterImpl : public FileWriter {
   std::shared_ptr<::arrow::Schema> schema() const override { return schema_; }
 
   Status WriteTable(const Table& table, int64_t chunk_size) override {
+    RETURN_NOT_OK(CheckClosed());
     RETURN_NOT_OK(table.Validate());
 
     if (chunk_size <= 0 && table.num_rows() > 0) {
@@ -392,6 +402,7 @@ class FileWriterImpl : public FileWriter {
   }
 
   Status NewBufferedRowGroup() override {
+    RETURN_NOT_OK(CheckClosed());
     if (row_group_writer_ != nullptr) {
       PARQUET_CATCH_NOT_OK(row_group_writer_->Close());
     }
@@ -400,6 +411,7 @@ class FileWriterImpl : public FileWriter {
   }
 
   Status WriteRecordBatch(const RecordBatch& batch) override {
+    RETURN_NOT_OK(CheckClosed());
     if (batch.num_rows() == 0) {
       return Status::OK();
     }
diff --git a/cpp/src/parquet/file_writer.cc b/cpp/src/parquet/file_writer.cc
index 9a92d4525d23d..5502e1f94a9d0 100644
--- a/cpp/src/parquet/file_writer.cc
+++ b/cpp/src/parquet/file_writer.cc
@@ -656,7 +656,11 @@ void ParquetFileWriter::AddKeyValueMetadata(
 }
 
 const std::shared_ptr<WriterProperties>& ParquetFileWriter::properties() const {
-  return contents_->properties();
+  if (contents_) {
+    return contents_->properties();
+  } else {
+    throw ParquetException("Cannot get properties from closed file");
+  }
 }
 
 }  // namespace parquet

From 5583dbeca4f36d6eadba979c619c4a07dbb2095f Mon Sep 17 00:00:00 2001
From: James Duong <james.duong@improving.com>
Date: Thu, 16 Nov 2023 06:20:12 -0800
Subject: [PATCH 20/23] GH-38737: [Java] Fix JDBC caching of SqlInfo values
 (#38739)

### Rationale for this change
The cache of SqlInfo properties that ArrowDatabaseMetaData maintains isn't populated in a thread-safe way. This can cause JDBC applications trying to retrieve several properties from DatabaseMetaData to encounter missing properties when they shouldn't.

### What changes are included in this PR?
- Changed the checking for the cache being populated to be based on an AtomicBoolean marking that the cache is fully populated, rather than just checking if the cache is empty.
- Avoid having multiple threads call getSqlInfo() if they see that the cache is empty concurrently.

### Are these changes tested?
Verified existing unit tests.

### Are there any user-facing changes?
No.
* Closes: #38737

Authored-by: James Duong <james.duong@improving.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../arrow/driver/jdbc/ArrowDatabaseMetadata.java | 16 +++++++++++-----
 .../driver/jdbc/utils/MockFlightSqlProducer.java | 10 +++++-----
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadata.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadata.java
index da2b0b00edaef..3487e58a64678 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadata.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadata.java
@@ -45,11 +45,11 @@
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.EnumMap;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 
@@ -145,8 +145,8 @@ public class ArrowDatabaseMetadata extends AvaticaDatabaseMetaData {
           Field.notNullable("IS_AUTOINCREMENT", Types.MinorType.VARCHAR.getType()),
           Field.notNullable("IS_GENERATEDCOLUMN", Types.MinorType.VARCHAR.getType())
       ));
-  private final Map<SqlInfo, Object> cachedSqlInfo =
-      Collections.synchronizedMap(new EnumMap<>(SqlInfo.class));
+  private final AtomicBoolean isCachePopulated = new AtomicBoolean(false);
+  private final Map<SqlInfo, Object> cachedSqlInfo = new EnumMap<>(SqlInfo.class);
   private static final Map<Integer, Integer> sqlTypesToFlightEnumConvertTypes = new HashMap<>();
 
   static {
@@ -729,10 +729,15 @@ private <T> T getSqlInfoAndCacheIfCacheIsEmpty(final SqlInfo sqlInfoCommand,
                                                  final Class<T> desiredType)
       throws SQLException {
     final ArrowFlightConnection connection = getConnection();
-    if (cachedSqlInfo.isEmpty()) {
-      final FlightInfo sqlInfo = connection.getClientHandler().getSqlInfo();
+    if (!isCachePopulated.get()) {
+      // Lock-and-populate the cache. Only issue the call to getSqlInfo() once,
+      // populate the cache, then mark it as populated.
+      // Note that multiple callers from separate threads can see that the cache is not populated, but only
+      // one thread will try to populate the cache. Other threads will see the cache is non-empty when acquiring
+      // the lock on the cache and skip population.
       synchronized (cachedSqlInfo) {
         if (cachedSqlInfo.isEmpty()) {
+          final FlightInfo sqlInfo = connection.getClientHandler().getSqlInfo();
           try (final ResultSet resultSet =
                    ArrowFlightJdbcFlightStreamResultSet.fromFlightInfo(
                        connection, sqlInfo, null)) {
@@ -741,6 +746,7 @@ private <T> T getSqlInfoAndCacheIfCacheIsEmpty(final SqlInfo sqlInfoCommand,
                   resultSet.getObject("value"));
             }
           }
+          isCachePopulated.set(true);
         }
       }
     }
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/MockFlightSqlProducer.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/MockFlightSqlProducer.java
index 75a7396931c8e..2b65f8f5a07ba 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/MockFlightSqlProducer.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/MockFlightSqlProducer.java
@@ -105,8 +105,8 @@ public final class MockFlightSqlProducer implements FlightSqlProducer {
 
   private final Map<String, Integer> actionTypeCounter = new HashMap<>();
 
-  private static FlightInfo getFightInfoExportedAndImportedKeys(final Message message,
-                                                                final FlightDescriptor descriptor) {
+  private static FlightInfo getFlightInfoExportedAndImportedKeys(final Message message,
+                                                                 final FlightDescriptor descriptor) {
     return getFlightInfo(message, Schemas.GET_IMPORTED_KEYS_SCHEMA, descriptor);
   }
 
@@ -529,14 +529,14 @@ public void getStreamPrimaryKeys(final CommandGetPrimaryKeys commandGetPrimaryKe
   public FlightInfo getFlightInfoExportedKeys(final CommandGetExportedKeys commandGetExportedKeys,
                                               final CallContext callContext,
                                               final FlightDescriptor flightDescriptor) {
-    return getFightInfoExportedAndImportedKeys(commandGetExportedKeys, flightDescriptor);
+    return getFlightInfoExportedAndImportedKeys(commandGetExportedKeys, flightDescriptor);
   }
 
   @Override
   public FlightInfo getFlightInfoImportedKeys(final CommandGetImportedKeys commandGetImportedKeys,
                                               final CallContext callContext,
                                               final FlightDescriptor flightDescriptor) {
-    return getFightInfoExportedAndImportedKeys(commandGetImportedKeys, flightDescriptor);
+    return getFlightInfoExportedAndImportedKeys(commandGetImportedKeys, flightDescriptor);
   }
 
   @Override
@@ -544,7 +544,7 @@ public FlightInfo getFlightInfoCrossReference(
       final CommandGetCrossReference commandGetCrossReference,
       final CallContext callContext,
       final FlightDescriptor flightDescriptor) {
-    return getFightInfoExportedAndImportedKeys(commandGetCrossReference, flightDescriptor);
+    return getFlightInfoExportedAndImportedKeys(commandGetCrossReference, flightDescriptor);
   }
 
   @Override

From 971bf381508c3633781f872a6cb3a4e8cc2a1f81 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 16 Nov 2023 15:59:42 +0100
Subject: [PATCH 21/23] GH-38745: [Integration] Fix huge integration test
 (#38746)

### Rationale for this change

For some reason, the decimal files generated by the integration test suite have as many record batches as columns.
This looks like a programming error rather than something deliberate.

### What changes are included in this PR?

Use similar batch sizes as for other test files.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.

* Closes: #38745

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 dev/archery/archery/integration/datagen.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py
index 1ce2775c160b8..ff10c0bb03fb6 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1520,8 +1520,7 @@ def generate_decimal128_case():
         for i, precision in enumerate(range(3, 39))
     ]
 
-    possible_batch_sizes = 7, 10
-    batch_sizes = [possible_batch_sizes[i % 2] for i in range(len(fields))]
+    batch_sizes = [7, 10]
     # 'decimal' is the original name for the test, and it must match
     # provide "gold" files that test backwards compatibility, so they
     # can be appropriately skipped.
@@ -1535,8 +1534,7 @@ def generate_decimal256_case():
         for i, precision in enumerate(range(37, 70))
     ]
 
-    possible_batch_sizes = 7, 10
-    batch_sizes = [possible_batch_sizes[i % 2] for i in range(len(fields))]
+    batch_sizes = [7, 10]
     return _generate_file('decimal256', fields, batch_sizes)
 
 

From 1fd11d33cb56fd7eff4dce05edaba1c9d8a1dccd Mon Sep 17 00:00:00 2001
From: Lei Hou <leihou@mathworks.com>
Date: Thu, 16 Nov 2023 12:53:02 -0500
Subject: [PATCH 22/23] GH-38652: [MATLAB] Add tests about time precision
 preservation when converting MATLAB duration to `arrow.array.Time32Array` and
 `arrow.array.Time64Array` (#38661)

### Rationale for this change

The current conversion from MATLAB duration to `arrow.array.Time32Array` and `arrow.array.Time64Array` loses time precision, and there is no test to cover such limitation. It is best practice to have tests cover software design. In addition, such tests will be helpful to evaluate the impact in the future when we improve the design.

### What changes are included in this PR?

I mainly added three test cases for each of `arrow.array.Time32Array` and `arrow.array.Time64Array`.

- Updated the basic test case to verify both class and value. In the MATLAB interface tests, we would like to verify the value to make sure there is no precision loss. The basic test case will serve as a test example when people learn to write tests. Updating the basic test case will set a good example for contributors to learn.
- Test the default value of "TimeUnit".
- Test the functionality of "TimeUnit".

### Are these changes tested?

No software change. The updated test files passed on my local machine.

### Are there any user-facing changes?

No
* Closes: #38652

Authored-by: Lei Hou <leihou@mathworks.com>
Signed-off-by: Kevin Gurney <kgurney@mathworks.com>
---
 matlab/test/arrow/array/tTime32Array.m | 33 +++++++++++
 matlab/test/arrow/array/tTime64Array.m | 76 ++++++++++++++++++++++++++
 2 files changed, 109 insertions(+)

diff --git a/matlab/test/arrow/array/tTime32Array.m b/matlab/test/arrow/array/tTime32Array.m
index cc2fad64b2a28..24c3508a86015 100644
--- a/matlab/test/arrow/array/tTime32Array.m
+++ b/matlab/test/arrow/array/tTime32Array.m
@@ -30,6 +30,15 @@ function Basic(tc)
             times = seconds(1:4);
             array = tc.ArrowArrayConstructorFcn(times);
             tc.verifyInstanceOf(array, "arrow.array.Time32Array");
+            tc.verifyEqual(array.toMATLAB, times');
+        end
+
+        function TimeUnitDefaultValue(tc)
+            % Verify that the default value of "TimeUnit" is "second".
+            times = seconds([1.2 1.3 1.4 1.5 1.7]);
+            array = tc.ArrowArrayConstructorFcn(times);
+            tc.verifyEqual(array.Type.TimeUnit, arrow.type.TimeUnit.Second);
+            tc.verifyEqual(array.toMATLAB, seconds([1;1;1;2;2]));
         end
 
         function TypeIsTime32(tc)
@@ -274,6 +283,30 @@ function TestIsEqualFalseTimeUnitMistmatch(tc)
             % arrays are not equal
             tc.verifyFalse(isequal(array1, array2));
         end
+
+        function RoundTimeBySpecifiedTimeUnit(tc)
+            % Verify that the input parameter "TimeUnit" is used to specify
+            % the time resolution. The value is rounded off based on the
+            % specified "TimeUnit".
+
+            % TimeUnit="Second"
+            matlabTimes = seconds([1.1, 1.4, 1.5, 1.9, 2.001]);
+            arrowTimes = tc.ArrowArrayConstructorFcn(matlabTimes, TimeUnit="Second");
+            tc.verifyEqual(arrowTimes.toMATLAB(),seconds([1, 1, 2, 2, 2])');
+
+            % TimeUnit="Millisecond"
+            matlabTimes = seconds([1.1, 1.99, 1.001, 1.0004, 1.0005, 2.001]);
+            arrowTimes = tc.ArrowArrayConstructorFcn(matlabTimes, TimeUnit="Millisecond");
+            tc.verifyEqual(arrowTimes.toMATLAB(),seconds([1.1, 1.99, 1.001, 1, 1.001, 2.001])','AbsTol',seconds(1e-15));
+        end
+
+        function TimeUnitIsReadOnly(tc)
+            % Verify that arrowArray.Type.TimeUnit cannot be changed.
+
+            matlabTimes = seconds([1.1, 1.4, 1.5, 1.9, 2.001]);
+            arrowArray = tc.ArrowArrayConstructorFcn(matlabTimes);
+            tc.verifyError(@()setfield(arrowArray.Type,"TimeUnit", "millisecond"),'MATLAB:class:SetProhibited');
+        end
     end
 
     methods
diff --git a/matlab/test/arrow/array/tTime64Array.m b/matlab/test/arrow/array/tTime64Array.m
index a078c5e2173f3..3f66ebd638c65 100644
--- a/matlab/test/arrow/array/tTime64Array.m
+++ b/matlab/test/arrow/array/tTime64Array.m
@@ -30,6 +30,26 @@ function Basic(tc)
             times = seconds(1:4);
             array = tc.ArrowArrayConstructorFcn(times);
             tc.verifyInstanceOf(array, "arrow.array.Time64Array");
+            tc.verifyEqual(array.toMATLAB, times');
+        end
+
+        function TimeUnitDefaultValue(tc)
+            % Verify that the default value of "TimeUnit" is "Microsecond".
+            matlabTimes = seconds([1; ...
+                                   0.001; ...
+                                   2.004521; ...
+                                   3.1234564; ...
+                                   4.1234566; ...
+                                   5.000000123]);
+            arrowArray = tc.ArrowArrayConstructorFcn(matlabTimes);
+            tc.verifyEqual(arrowArray.Type.TimeUnit, arrow.type.TimeUnit.Microsecond);
+            tc.verifyEqual(arrowArray.toMATLAB(), ...
+                           seconds([1;...
+                                    0.001; ...
+                                    2.004521; ...
+                                    3.123456; ...
+                                    4.123457; ...
+                                    5]));
         end
 
         function TypeIsTime64(tc)
@@ -290,6 +310,62 @@ function TestIsEqualFalseTimeUnitMistmatch(tc)
             % arrays are not equal
             tc.verifyFalse(isequal(array1, array2));
         end
+
+        function RoundTimeBySpecifiedTimeUnit(tc)
+            % Verify that the input parameter "TimeUnit" is used to specify
+            % the time resolution. The value is rounded off based on the
+            % specified "TimeUnit".
+
+            % TimeUnit="Microsecond"
+            matlabTimes = seconds([1.000001, ...
+                                   2.999999, ...
+                                   0.0002004, ...
+                                   0.0000035, ...
+                                   10.123456499, ...
+                                   9.999999543]);
+            arrowTimes = tc.ArrowArrayConstructorFcn(matlabTimes, TimeUnit="Microsecond");
+            tc.verifyEqual(arrowTimes.toMATLAB(), ...
+                           seconds([1.000001, ...
+                                    2.999999, ...
+                                    0.0002, ...
+                                    0.000004, ...
+                                    10.123456, ...
+                                    10])', ...
+                          'AbsTol',seconds(1e-14));
+
+            % TimeUnit="Nanosecond"
+            matlabTimes = seconds([1, ...
+                                   1.123, ...
+                                   1.12345, ...
+                                   1.123456, ...
+                                   1.1234567, ...
+                                   1.12345678, ...
+                                   1.123456789, ...
+                                   1.1234567894, ...
+                                   1.1234567895, ...
+                                   1.123456789009]);
+            arrowTimes = tc.ArrowArrayConstructorFcn(matlabTimes, TimeUnit="Nanosecond");
+            tc.verifyEqual(arrowTimes.toMATLAB(),...
+                           seconds([1, ...
+                                    1.123, ...
+                                    1.12345, ...
+                                    1.123456, ...
+                                    1.1234567, ...
+                                    1.12345678, ...
+                                    1.123456789, ...
+                                    1.123456789, ...
+                                    1.123456790, ...
+                                    1.123456789])',...
+                          'AbsTol',seconds(1e-15));
+        end
+
+        function TimeUnitIsReadOnly(tc)
+            % Verify that arrowArray.Type.TimeUnit cannot be changed.
+
+            matlabTimes = seconds([1.000001, 2.999999, 0.0002004]);
+            arrowArray = tc.ArrowArrayConstructorFcn(matlabTimes);
+            tc.verifyError(@()setfield(arrowArray.Type,"TimeUnit", "Nanosecond"),'MATLAB:class:SetProhibited');
+        end
     end
 
     methods

From e543ee69c96062dcd6afb239b5b9c53b178d357c Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 16 Nov 2023 18:57:24 -0600
Subject: [PATCH 23/23] GH-38752: [R] Wrap rosetta detection in tryCatch
 (#38754)

### Rationale for this change

We should never allow rosetta checking from causing an error

### What changes are included in this PR?

~Wrap rosetta checking in a tryCatch~ our use of `try()` wasn't doing what we thought, it actually needs to have `silent = TRUE` specified to _not_ error.

### Are these changes tested?

I tested them locally by manipulating the system call to a mangled command that doesn't exist, observing the error on load, then wrapping in trycatch. We might consider adding a test in CI, though there would be considerable complexity for something like that

### Are there any user-facing changes?

No, though we will need to pull it into any point release
* Closes: #38752

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 r/R/arrow-package.R |  2 +-
 r/R/install-arrow.R | 10 ++++++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index eec95b8282bfd..1f39a50744abc 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -212,7 +212,7 @@ configure_tzdb <- function() {
         )
       )
     }
-  })
+  }, silent = TRUE)
 }
 
 # Clean up the StopSource that was registered in .onLoad() so that if the
diff --git a/r/R/install-arrow.R b/r/R/install-arrow.R
index 6db6f2b0adaa3..88eb61a5dae76 100644
--- a/r/R/install-arrow.R
+++ b/r/R/install-arrow.R
@@ -271,6 +271,12 @@ wslify_path <- function(path) {
 
 on_rosetta <- function() {
   # make sure to suppress warnings and ignore the stderr so that this is silent where proc_translated doesn't exist
-  identical(tolower(Sys.info()[["sysname"]]), "darwin") &&
-    identical(suppressWarnings(system("sysctl -n sysctl.proc_translated", intern = TRUE, ignore.stderr = TRUE)), "1")
+  sysctl_out <- tryCatch(
+    suppressWarnings(system("sysctl -n sysctl.proc_translated", intern = TRUE, ignore.stderr = TRUE)),
+    error = function(e) {
+      # If this has errored, we assume that this is not on rosetta
+      return("0")
+    }
+  )
+  identical(tolower(Sys.info()[["sysname"]]), "darwin") && identical(sysctl_out, "1")
 }