diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9ee356d4..8b25e440 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -189,11 +189,3 @@ jobs:
           RUST_BACKTRACE: 1
           CARGO_INCREMENTAL: 0
           UNITTEST_LOG_DIR: "__unittest_logs"
-      - name: Codecov upload
-        uses: codecov/codecov-action@v2
-        with:
-          token: ${{ secrets.CODECOV_TOKEN }}
-          files: ./lcov.info
-          flags: rust
-          fail_ci_if_error: false
-          verbose: true
diff --git a/Cargo.toml b/Cargo.toml
index 5fe3276f..5b6acf0c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -16,7 +16,7 @@
 # under the License.
 
 [package]
-name = "orc-rust"
+name = "datafusion-orc"
 version = "0.4.1"
 edition = "2021"
 homepage = "https://github.com/datafusion-contrib/datafusion-orc"
@@ -32,26 +32,17 @@ rust-version = "1.73"
 all-features = true
 
 [dependencies]
-arrow = { version = "52", features = ["prettyprint", "chrono-tz"] }
-bytemuck = { version = "1.18.0", features = ["must_cast"] }
+arrow = { version = "53", features = ["prettyprint", "chrono-tz"] }
+async-trait = { version = "0.1.77" }
 bytes = "1.4"
-chrono = { version = "0.4.37", default-features = false, features = ["std"] }
-chrono-tz = "0.9"
-fallible-streaming-iterator = { version = "0.1" }
-flate2 = "1"
-lz4_flex = "0.11"
-lzokay-native = "0.1"
-num = "0.4.1"
-prost = { version = "0.12" }
-snafu = "0.8"
-snap = "1.1"
-zstd = "0.12"
-
-# async support
-async-trait = { version = "0.1.77", optional = true }
-futures = { version = "0.3", optional = true, default-features = false, features = ["std"] }
-futures-util = { version = "0.3", optional = true }
-tokio = { version = "1.28", optional = true, features = [
+datafusion = { version = "42.0" }
+datafusion-expr = { version = "42.0" }
+datafusion-physical-expr = { version = "42.0" }
+futures = { version = "0.3", default-features = false, features = ["std"] }
+futures-util = { version = "0.3" }
+object_store = { version = "0.11" }
+orc-rust = { version = "0.5", features = ["async"] }
+tokio = { version = "1.28", features = [
     "io-util",
     "sync",
     "fs",
@@ -60,61 +51,16 @@ tokio = { version = "1.28", optional = true, features = [
     "rt-multi-thread",
 ] }
 
-# cli
-anyhow = { version = "1.0", optional = true }
-clap = { version = "4.5.4", features = ["derive"], optional = true }
-
-# opendal
-opendal = { version = "0.48", optional = true, default-features = false }
-
-# datafusion support
-datafusion = { version = "39.0.0", optional = true }
-datafusion-expr = { version = "39.0.0", optional = true }
-datafusion-physical-expr = { version = "39.0.0", optional = true }
-object_store = { version = "0.10.1", optional = true }
-
 [dev-dependencies]
-arrow-ipc = { version = "52.0.0", features = ["lz4"] }
-arrow-json = "52.0.0"
+arrow-ipc = { version = "53.0.0", features = ["lz4"] }
+arrow-json = "53.0.0"
 criterion = { version = "0.5", default-features = false, features = ["async_tokio"] }
 opendal = { version = "0.48", default-features = false, features = ["services-memory"] }
 pretty_assertions = "1.3.0"
 proptest = "1.0.0"
 serde_json = { version = "1.0", default-features = false, features = ["std"] }
 
-[features]
-default = ["async"]
-
-async = ["async-trait", "futures", "futures-util", "tokio"]
-cli = ["anyhow", "clap"]
-datafusion = ["async", "dep:datafusion", "datafusion-expr", "datafusion-physical-expr", "object_store"]
-# Enable opendal support.
-opendal = ["dep:opendal"]
-
-[[bench]]
-name = "arrow_reader"
-harness = false
-required-features = ["async"]
-# Some issue when publishing and path isn't specified, so adding here
-path = "./benches/arrow_reader.rs"
-
-[profile.bench]
-debug = true
-
 [[example]]
 name = "datafusion_integration"
-required-features = ["datafusion"]
 # Some issue when publishing and path isn't specified, so adding here
 path = "./examples/datafusion_integration.rs"
-
-[[bin]]
-name = "orc-metadata"
-required-features = ["cli"]
-
-[[bin]]
-name = "orc-export"
-required-features = ["cli"]
-
-[[bin]]
-name = "orc-stats"
-required-features = ["cli"]
diff --git a/README.md b/README.md
index 6532c32d..e778d156 100644
--- a/README.md
+++ b/README.md
@@ -1,124 +1,5 @@
 [![test](https://github.com/datafusion-contrib/datafusion-orc/actions/workflows/ci.yml/badge.svg)](https://github.com/datafusion-contrib/datafusion-orc/actions/workflows/ci.yml)
-[![codecov](https://codecov.io/gh/WenyXu/orc-rs/branch/main/graph/badge.svg?token=2CSHZX02XM)](https://codecov.io/gh/WenyXu/orc-rs)
-[![Crates.io](https://img.shields.io/crates/v/orc-rust)](https://crates.io/crates/orc-rust)
-[![Crates.io](https://img.shields.io/crates/d/orc-rust)](https://crates.io/crates/orc-rust)
 
-# orc-rust
-
-A native Rust implementation of the [Apache ORC](https://orc.apache.org) file format,
-providing API's to read data into [Apache Arrow](https://arrow.apache.org) in-memory arrays.
-
-See the [documentation](https://docs.rs/orc-rust/latest/orc_rust/) for examples on how to use this crate.
-
-## Supported features
-
-This crate currently only supports reading ORC files into Arrow arrays. Write support is planned
-(see [Roadmap](#roadmap)). The below features listed relate only to reading ORC files.
-At this time, we aim to support the [ORCv1](https://orc.apache.org/specification/ORCv1/) specification only.
-
-- Read synchronously & asynchronously (using Tokio)
-- All compression types (Zlib, Snappy, Lzo, Lz4, Zstd)
-- All ORC data types
-- All encodings
-- Rudimentary support for retrieving statistics
-- Retrieving user metadata into Arrow schema metadata
-
-## Roadmap
-
-The long term vision for this crate is to be feature complete enough to be donated to the
-[arrow-rs](https://github.com/apache/arrow-rs) project.
-
-The following lists the rough roadmap for features to be implemented, from highest to lowest priority.
-
-- Performance enhancements
-- DataFusion integration
-- Predicate pushdown
-- Row indices
-- Bloom filters
-- Write from Arrow arrays
-- Encryption
-
-A non-Arrow API interface is not planned at the moment. Feel free to raise an issue if there is such
-a use case.
-
-## Version compatibility
-
-No guarantees are provided about stability across versions. We will endeavour to keep the top level API's
-(`ArrowReader` and `ArrowStreamReader`) as stable as we can, but other API's provided may change as we
-explore the interface we want the library to expose.
-
-Versions will be released on an ad-hoc basis (with no fixed schedule).
-
-## Mapping ORC types to Arrow types
-
-The following table lists how ORC data types are read into Arrow data types:
-
-| ORC Data Type     | Arrow Data Type             | Notes |
-| ----------------- | --------------------------  | ----- |
-| Boolean           | Boolean                     |       |
-| TinyInt           | Int8                        |       |
-| SmallInt          | Int16                       |       |
-| Int               | Int32                       |       |
-| BigInt            | Int64                       |       |
-| Float             | Float32                     |       |
-| Double            | Float64                     |       |
-| String            | Utf8                        |       |
-| Char              | Utf8                        |       |
-| VarChar           | Utf8                        |       |
-| Binary            | Binary                      |       |
-| Decimal           | Decimal128                  |       |
-| Date              | Date32                      |       |
-| Timestamp         | Timestamp(Nanosecond, None) | ¹     |
-| Timestamp instant | Timestamp(Nanosecond, UTC)  | ¹     |
-| Struct            | Struct                      |       |
-| List              | List                        |       |
-| Map               | Map                         |       |
-| Union             | Union(_, Sparse)            | ²     |
-
-¹: `ArrowReaderBuilder::with_schema` allows configuring different time units or decoding to
-`Decimal128(38, 9)` (i128 of non-leap nanoseconds since UNIX epoch).
-Overflows may happen while decoding to a non-Seconds time unit, and results in `OrcError`.
-Loss of precision may happen while decoding to a non-Nanosecond time unit, and results in `OrcError`.
-`Decimal128(38, 9)` avoids both overflows and loss of precision.
-
-²: Currently only supports a maximum of 127 variants
-
-## Contributing
-
-All contributions are welcome! Feel free to raise an issue if you have a feature request, bug report,
-or a question. Feel free to raise a Pull Request without raising an issue first, as long as the Pull
-Request is descriptive enough.
-
-Some tools we use in addition to the standard `cargo` that require installation are:
-
-- [taplo](https://taplo.tamasfe.dev/)
-- [typos](https://crates.io/crates/typos)
-
-```shell
-cargo install typos-cli
-cargo install taplo-cli
-```
-
-```shell
-# Building the crate
-cargo build
-
-# Running the test suite
-cargo test
-
-# Simple benchmarks
-cargo bench
-
-# Formatting TOML files
-taplo format
-
-# Detect any typos in the codebase
-typos
-```
-
-To regenerate/update the [proto.rs](src/proto.rs) file, execute the [regen.sh](regen.sh) script.
-
-```shell
-./regen.sh
-```
+# datafusion-orc
 
+Experimental ORC file reader for DataFusion based on [orc-rust](https://crates.io/crates/orc-rust).
diff --git a/benches/arrow_reader.rs b/benches/arrow_reader.rs
deleted file mode 100644
index 10f28996..00000000
--- a/benches/arrow_reader.rs
+++ /dev/null
@@ -1,70 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::fs::File;
-
-use criterion::{criterion_group, criterion_main, Criterion};
-use futures_util::TryStreamExt;
-use orc_rust::arrow_reader::ArrowReaderBuilder;
-
-fn basic_path(path: &str) -> String {
-    let dir = env!("CARGO_MANIFEST_DIR");
-    format!("{}/tests/basic/data/{}", dir, path)
-}
-
-// demo-12-zlib.orc
-// 1,920,800 total rows
-// Columns:
-//   - Int32
-//   - Dictionary(UInt64, Utf8)
-//   - Dictionary(UInt64, Utf8)
-//   - Dictionary(UInt64, Utf8)
-//   - Int32
-//   - Dictionary(UInt64, Utf8)
-//   - Int32
-//   - Int32
-//   - Int32
-
-async fn async_read_all() {
-    let file = "demo-12-zlib.orc";
-    let file_path = basic_path(file);
-    let f = tokio::fs::File::open(file_path).await.unwrap();
-    let reader = ArrowReaderBuilder::try_new_async(f)
-        .await
-        .unwrap()
-        .build_async();
-    let _ = reader.try_collect::<Vec<_>>().await.unwrap();
-}
-
-fn sync_read_all() {
-    let file = "demo-12-zlib.orc";
-    let file_path = basic_path(file);
-    let f = File::open(file_path).unwrap();
-    let reader = ArrowReaderBuilder::try_new(f).unwrap().build();
-    let _ = reader.collect::<Result<Vec<_>, _>>().unwrap();
-}
-
-fn criterion_benchmark(c: &mut Criterion) {
-    c.bench_function("sync reader", |b| b.iter(sync_read_all));
-    c.bench_function("async reader", |b| {
-        b.to_async(tokio::runtime::Runtime::new().unwrap())
-            .iter(async_read_all);
-    });
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
diff --git a/examples/datafusion_integration.rs b/examples/datafusion_integration.rs
index c46a5983..78d8fa0c 100644
--- a/examples/datafusion_integration.rs
+++ b/examples/datafusion_integration.rs
@@ -17,7 +17,7 @@
 
 use datafusion::error::Result;
 use datafusion::prelude::*;
-use orc_rust::datafusion::{OrcReadOptions, SessionContextOrcExt};
+use datafusion_orc::{OrcReadOptions, SessionContextOrcExt};
 
 #[tokio::main]
 async fn main() -> Result<()> {
diff --git a/format/orc_proto.proto b/format/orc_proto.proto
deleted file mode 100644
index ff716599..00000000
--- a/format/orc_proto.proto
+++ /dev/null
@@ -1,452 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-syntax = "proto2";
-
-package orc.proto;
-
-option java_package = "org.apache.orc";
-
-message IntegerStatistics  {
-  optional sint64 minimum = 1;
-  optional sint64 maximum = 2;
-  optional sint64 sum = 3;
-}
-
-message DoubleStatistics {
-  optional double minimum = 1;
-  optional double maximum = 2;
-  optional double sum = 3;
-}
-
-message StringStatistics {
-  optional string minimum = 1;
-  optional string maximum = 2;
-  // sum will store the total length of all strings in a stripe
-  optional sint64 sum = 3;
-  // If the minimum or maximum value was longer than 1024 bytes, store a lower or upper
-  // bound instead of the minimum or maximum values above.
-  optional string lowerBound = 4;
-  optional string upperBound = 5;
-}
-
-message BucketStatistics {
-  repeated uint64 count = 1 [packed=true];
-}
-
-message DecimalStatistics {
-  optional string minimum = 1;
-  optional string maximum = 2;
-  optional string sum = 3;
-}
-
-message DateStatistics {
-  // min,max values saved as days since epoch
-  optional sint32 minimum = 1;
-  optional sint32 maximum = 2;
-}
-
-message TimestampStatistics {
-  // min,max values saved as milliseconds since epoch
-  optional sint64 minimum = 1;
-  optional sint64 maximum = 2;
-  optional sint64 minimumUtc = 3;
-  optional sint64 maximumUtc = 4;
-  // store the lower 6 TS digits for min/max to achieve nanosecond precision
-  optional int32 minimumNanos = 5;
-  optional int32 maximumNanos = 6;
-}
-
-message BinaryStatistics {
-  // sum will store the total binary blob length in a stripe
-  optional sint64 sum = 1;
-}
-
-// Statistics for list and map
-message CollectionStatistics {
-  optional uint64 minChildren = 1;
-  optional uint64 maxChildren = 2;
-  optional uint64 totalChildren = 3;
-}
-
-message ColumnStatistics {
-  optional uint64 numberOfValues = 1;
-  optional IntegerStatistics intStatistics = 2;
-  optional DoubleStatistics doubleStatistics = 3;
-  optional StringStatistics stringStatistics = 4;
-  optional BucketStatistics bucketStatistics = 5;
-  optional DecimalStatistics decimalStatistics = 6;
-  optional DateStatistics dateStatistics = 7;
-  optional BinaryStatistics binaryStatistics = 8;
-  optional TimestampStatistics timestampStatistics = 9;
-  optional bool hasNull = 10;
-  optional uint64 bytesOnDisk = 11;
-  optional CollectionStatistics collectionStatistics = 12;
-}
-
-message RowIndexEntry {
-  repeated uint64 positions = 1 [packed=true];
-  optional ColumnStatistics statistics = 2;
-}
-
-message RowIndex {
-  repeated RowIndexEntry entry = 1;
-}
-
-message BloomFilter {
-  optional uint32 numHashFunctions = 1;
-  repeated fixed64 bitset = 2;
-  optional bytes utf8bitset = 3;
-}
-
-message BloomFilterIndex {
-  repeated BloomFilter bloomFilter = 1;
-}
-
-message Stream {
-  // if you add new index stream kinds, you need to make sure to update
-  // StreamName to ensure it is added to the stripe in the right area
-  enum Kind {
-    PRESENT = 0;
-    DATA = 1;
-    LENGTH = 2;
-    DICTIONARY_DATA = 3;
-    DICTIONARY_COUNT = 4;
-    SECONDARY = 5;
-    ROW_INDEX = 6;
-    BLOOM_FILTER = 7;
-    BLOOM_FILTER_UTF8 = 8;
-    // Virtual stream kinds to allocate space for encrypted index and data.
-    ENCRYPTED_INDEX = 9;
-    ENCRYPTED_DATA = 10;
-
-    // stripe statistics streams
-    STRIPE_STATISTICS = 100;
-    // A virtual stream kind that is used for setting the encryption IV.
-    FILE_STATISTICS = 101;
-  }
-  optional Kind kind = 1;
-  optional uint32 column = 2;
-  optional uint64 length = 3;
-}
-
-message ColumnEncoding {
-  enum Kind {
-    DIRECT = 0;
-    DICTIONARY = 1;
-    DIRECT_V2 = 2;
-    DICTIONARY_V2 = 3;
-  }
-  optional Kind kind = 1;
-  optional uint32 dictionarySize = 2;
-
-  // The encoding of the bloom filters for this column:
-  //   0 or missing = none or original
-  //   1            = ORC-135 (utc for timestamps)
-  optional uint32 bloomEncoding = 3;
-}
-
-message StripeEncryptionVariant {
-  repeated Stream streams = 1;
-  repeated ColumnEncoding encoding = 2;
-}
-
-// each stripe looks like:
-//   index streams
-//     unencrypted
-//     variant 1..N
-//   data streams
-//     unencrypted
-//     variant 1..N
-//   footer
-
-message StripeFooter {
-  repeated Stream streams = 1;
-  repeated ColumnEncoding columns = 2;
-  optional string writerTimezone = 3;
-  // one for each column encryption variant
-  repeated StripeEncryptionVariant encryption = 4;
-}
-
-// the file tail looks like:
-//   encrypted stripe statistics: ColumnarStripeStatistics (order by variant)
-//   stripe statistics: Metadata
-//   footer: Footer
-//   postscript: PostScript
-//   psLen: byte
-
-message StringPair {
-  optional string key = 1;
-  optional string value = 2;
-}
-
-message Type {
-  enum Kind {
-    BOOLEAN = 0;
-    BYTE = 1;
-    SHORT = 2;
-    INT = 3;
-    LONG = 4;
-    FLOAT = 5;
-    DOUBLE = 6;
-    STRING = 7;
-    BINARY = 8;
-    TIMESTAMP = 9;
-    LIST = 10;
-    MAP = 11;
-    STRUCT = 12;
-    UNION = 13;
-    DECIMAL = 14;
-    DATE = 15;
-    VARCHAR = 16;
-    CHAR = 17;
-    TIMESTAMP_INSTANT = 18;
-  }
-  optional Kind kind = 1;
-  repeated uint32 subtypes = 2 [packed=true];
-  repeated string fieldNames = 3;
-  optional uint32 maximumLength = 4;
-  optional uint32 precision = 5;
-  optional uint32 scale = 6;
-  repeated StringPair attributes = 7;
-}
-
-message StripeInformation {
-  // the global file offset of the start of the stripe
-  optional uint64 offset = 1;
-  // the number of bytes of index
-  optional uint64 indexLength = 2;
-  // the number of bytes of data
-  optional uint64 dataLength = 3;
-  // the number of bytes in the stripe footer
-  optional uint64 footerLength = 4;
-  // the number of rows in this stripe
-  optional uint64 numberOfRows = 5;
-  // If this is present, the reader should use this value for the encryption
-  // stripe id for setting the encryption IV. Otherwise, the reader should
-  // use one larger than the previous stripe's encryptStripeId.
-  // For unmerged ORC files, the first stripe will use 1 and the rest of the
-  // stripes won't have it set. For merged files, the stripe information
-  // will be copied from their original files and thus the first stripe of
-  // each of the input files will reset it to 1.
-  // Note that 1 was choosen, because protobuf v3 doesn't serialize
-  // primitive types that are the default (eg. 0).
-  optional uint64 encryptStripeId = 6;
-  // For each encryption variant, the new encrypted local key to use
-  // until we find a replacement.
-  repeated bytes encryptedLocalKeys = 7;
-}
-
-message UserMetadataItem {
-  optional string name = 1;
-  optional bytes value = 2;
-}
-
-// StripeStatistics (1 per a stripe), which each contain the
-// ColumnStatistics for each column.
-// This message type is only used in ORC v0 and v1.
-message StripeStatistics {
-  repeated ColumnStatistics colStats = 1;
-}
-
-// This message type is only used in ORC v0 and v1.
-message Metadata {
-  repeated StripeStatistics stripeStats = 1;
-}
-
-// In ORC v2 (and for encrypted columns in v1), each column has
-// their column statistics written separately.
-message ColumnarStripeStatistics {
-  // one value for each stripe in the file
-  repeated ColumnStatistics colStats = 1;
-}
-
-enum EncryptionAlgorithm {
-  UNKNOWN_ENCRYPTION = 0;  // used for detecting future algorithms
-  AES_CTR_128 = 1;
-  AES_CTR_256 = 2;
-}
-
-message FileStatistics {
-  repeated ColumnStatistics column = 1;
-}
-
-// How was the data masked? This isn't necessary for reading the file, but
-// is documentation about how the file was written.
-message DataMask {
-  // the kind of masking, which may include third party masks
-  optional string name = 1;
-  // parameters for the mask
-  repeated string maskParameters = 2;
-  // the unencrypted column roots this mask was applied to
-  repeated uint32 columns = 3 [packed = true];
-}
-
-// Information about the encryption keys.
-message EncryptionKey {
-  optional string keyName = 1;
-  optional uint32 keyVersion = 2;
-  optional EncryptionAlgorithm algorithm = 3;
-}
-
-// The description of an encryption variant.
-// Each variant is a single subtype that is encrypted with a single key.
-message EncryptionVariant {
-  // the column id of the root
-  optional uint32 root = 1;
-  // The master key that was used to encrypt the local key, referenced as
-  // an index into the Encryption.key list.
-  optional uint32 key = 2;
-  // the encrypted key for the file footer
-  optional bytes encryptedKey = 3;
-  // the stripe statistics for this variant
-  repeated Stream stripeStatistics = 4;
-  // encrypted file statistics as a FileStatistics
-  optional bytes fileStatistics = 5;
-}
-
-// Which KeyProvider encrypted the local keys.
-enum KeyProviderKind {
-  UNKNOWN = 0;
-  HADOOP = 1;
-  AWS = 2;
-  GCP = 3;
-  AZURE = 4;
-}
-
-message Encryption {
-  // all of the masks used in this file
-  repeated DataMask mask = 1;
-  // all of the keys used in this file
-  repeated EncryptionKey key = 2;
-  // The encrypted variants.
-  // Readers should prefer the first variant that the user has access to
-  // the corresponding key. If they don't have access to any of the keys,
-  // they should get the unencrypted masked data.
-  repeated EncryptionVariant variants = 3;
-  // How are the local keys encrypted?
-  optional KeyProviderKind keyProvider = 4;
-}
-
-enum CalendarKind {
-  UNKNOWN_CALENDAR = 0;
-   // A hybrid Julian/Gregorian calendar with a cutover point in October 1582.
-  JULIAN_GREGORIAN = 1;
-  // A calendar that extends the Gregorian calendar back forever.
-  PROLEPTIC_GREGORIAN = 2;
-}
-
-message Footer {
-  optional uint64 headerLength = 1;
-  optional uint64 contentLength = 2;
-  repeated StripeInformation stripes = 3;
-  repeated Type types = 4;
-  repeated UserMetadataItem metadata = 5;
-  optional uint64 numberOfRows = 6;
-  repeated ColumnStatistics statistics = 7;
-  optional uint32 rowIndexStride = 8;
-
-  // Each implementation that writes ORC files should register for a code
-  // 0 = ORC Java
-  // 1 = ORC C++
-  // 2 = Presto
-  // 3 = Scritchley Go from https://github.com/scritchley/orc
-  // 4 = Trino
-  optional uint32 writer = 9;
-
-  // information about the encryption in this file
-  optional Encryption encryption = 10;
-  optional CalendarKind calendar = 11;
-
-  // informative description about the version of the software that wrote
-  // the file. It is assumed to be within a given writer, so for example
-  // ORC 1.7.2 = "1.7.2". It may include suffixes, such as "-SNAPSHOT".
-  optional string softwareVersion = 12;
-}
-
-enum CompressionKind {
-  NONE = 0;
-  ZLIB = 1;
-  SNAPPY = 2;
-  LZO = 3;
-  LZ4 = 4;
-  ZSTD = 5;
-}
-
-// Serialized length must be less that 255 bytes
-message PostScript {
-  optional uint64 footerLength = 1;
-  optional CompressionKind compression = 2;
-  optional uint64 compressionBlockSize = 3;
-  // the version of the file format
-  //   [0, 11] = Hive 0.11
-  //   [0, 12] = Hive 0.12
-  repeated uint32 version = 4 [packed = true];
-  optional uint64 metadataLength = 5;
-
-  // The version of the writer that wrote the file. This number is
-  // updated when we make fixes or large changes to the writer so that
-  // readers can detect whether a given bug is present in the data.
-  //
-  // Only the Java ORC writer may use values under 6 (or missing) so that
-  // readers that predate ORC-202 treat the new writers correctly. Each
-  // writer should assign their own sequence of versions starting from 6.
-  //
-  // Version of the ORC Java writer:
-  //   0 = original
-  //   1 = HIVE-8732 fixed (fixed stripe/file maximum statistics &
-  //                        string statistics use utf8 for min/max)
-  //   2 = HIVE-4243 fixed (use real column names from Hive tables)
-  //   3 = HIVE-12055 added (vectorized writer implementation)
-  //   4 = HIVE-13083 fixed (decimals write present stream correctly)
-  //   5 = ORC-101 fixed (bloom filters use utf8 consistently)
-  //   6 = ORC-135 fixed (timestamp statistics use utc)
-  //   7 = ORC-517 fixed (decimal64 min/max incorrect)
-  //   8 = ORC-203 added (trim very long string statistics)
-  //   9 = ORC-14 added (column encryption)
-  //
-  // Version of the ORC C++ writer:
-  //   6 = original
-  //
-  // Version of the Presto writer:
-  //   6 = original
-  //
-  // Version of the Scritchley Go writer:
-  //   6 = original
-  //
-  // Version of the Trino writer:
-  //   6 = original
-  //
-  optional uint32 writerVersion = 6;
-
-  // the number of bytes in the encrypted stripe statistics
-  optional uint64 stripeStatisticsLength = 7;
-
-  // Leave this last in the record
-  optional string magic = 8000;
-}
-
-// The contents of the file tail that must be serialized.
-// This gets serialized as part of OrcSplit, also used by footer cache.
-message FileTail {
-  optional PostScript postscript = 1;
-  optional Footer footer = 2;
-  optional uint64 fileLength = 3;
-  optional uint64 postscriptLength = 4;
-}
diff --git a/gen/Cargo.toml b/gen/Cargo.toml
deleted file mode 100644
index b9f2767e..00000000
--- a/gen/Cargo.toml
+++ /dev/null
@@ -1,28 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "gen"
-description = "Code generation for datafusion-orc"
-version = "0.1.0"
-edition = "2021"
-rust-version = "1.70"
-license = "Apache-2.0"
-publish = false
-
-[dependencies]
-prost-build = { version = "=0.12.1", default-features = false }
diff --git a/gen/src/main.rs b/gen/src/main.rs
deleted file mode 100644
index c1cba08b..00000000
--- a/gen/src/main.rs
+++ /dev/null
@@ -1,46 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::fs::{remove_file, OpenOptions};
-use std::io::{Read, Write};
-
-fn main() -> Result<(), Box<dyn std::error::Error>> {
-    prost_build::Config::new()
-        .out_dir("src/")
-        .compile_well_known_types()
-        .extern_path(".google.protobuf", "::pbjson_types")
-        .compile_protos(&["format/orc_proto.proto"], &["format"])?;
-
-    // read file contents to string
-    let mut file = OpenOptions::new().read(true).open("src/orc.proto.rs")?;
-    let mut buffer = String::new();
-    file.read_to_string(&mut buffer)?;
-    // append warning that file was auto-generate
-    let mut file = OpenOptions::new()
-        .write(true)
-        .truncate(true)
-        .create(true)
-        .open("src/proto.rs")?;
-    file.write_all("// This file was automatically generated through the regen.sh script, and should not be edited.\n\n".as_bytes())?;
-    file.write_all(buffer.as_bytes())?;
-
-    // since we renamed file to proto.rs to avoid period in the name
-    remove_file("src/orc.proto.rs")?;
-
-    // As the proto file is checked in, the build should not fail if the file is not found
-    Ok(())
-}
diff --git a/scripts/README.md b/scripts/README.md
deleted file mode 100644
index 9fd9403e..00000000
--- a/scripts/README.md
+++ /dev/null
@@ -1,17 +0,0 @@
-## Generate data
-
-Setup the virtual environment with dependencies on PyArrow, PySpark and PyOrc
-to generate the reference data:
-
-```bash
-# Run once
-./scripts/setup-venv.sh
-./scripts/prepare-test-data.sh
-```
-
-Then execute the tests:
-
-```bash
-cargo test
-```
-
diff --git a/scripts/convert_tpch.py b/scripts/convert_tpch.py
deleted file mode 100644
index 524201e8..00000000
--- a/scripts/convert_tpch.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import pyarrow as pa
-from pyarrow import orc
-from pyarrow import csv
-
-tables = [
-    "customer",
-    "lineitem",
-    "nation",
-    "orders",
-    "part",
-    "partsupp",
-    "region",
-    "supplier"
-]
-
-# Datatypes based on:
-# https://github.com/apache/datafusion/blob/3b93cc952b889cec2364ad2490ae18ecddb3ca49/benchmarks/src/tpch/mod.rs#L50-L134
-schemas = {
-    "customer": pa.schema([
-        pa.field("c_custkey",    pa.int64()),
-        pa.field("c_name",       pa.string()),
-        pa.field("c_address",    pa.string()),
-        pa.field("c_nationkey",  pa.int64()),
-        pa.field("c_phone",      pa.string()),
-        pa.field("c_acctbal",    pa.decimal128(15, 2)),
-        pa.field("c_mktsegment", pa.string()),
-        pa.field("c_comment",    pa.string()),
-    ]),
-    "lineitem": pa.schema([
-        pa.field("l_orderkey",      pa.int64()),
-        pa.field("l_partkey",       pa.int64()),
-        pa.field("l_suppkey",       pa.int64()),
-        pa.field("l_linenumber",    pa.int32()),
-        pa.field("l_quantity",      pa.decimal128(15, 2)),
-        pa.field("l_extendedprice", pa.decimal128(15, 2)),
-        pa.field("l_discount",      pa.decimal128(15, 2)),
-        pa.field("l_tax",           pa.decimal128(15, 2)),
-        pa.field("l_returnflag",    pa.string()),
-        pa.field("l_linestatus",    pa.string()),
-        pa.field("l_shipdate",      pa.date32()),
-        pa.field("l_commitdate",    pa.date32()),
-        pa.field("l_receiptdate",   pa.date32()),
-        pa.field("l_shipinstruct",  pa.string()),
-        pa.field("l_shipmode",      pa.string()),
-        pa.field("l_comment",       pa.string()),
-    ]),
-    "nation": pa.schema([
-        pa.field("n_nationkey", pa.int64()),
-        pa.field("n_name",      pa.string()),
-        pa.field("n_regionkey", pa.int64()),
-        pa.field("n_comment",   pa.string()),
-    ]),
-    "orders": pa.schema([
-        pa.field("o_orderkey",      pa.int64()),
-        pa.field("o_custkey",       pa.int64()),
-        pa.field("o_orderstatus",   pa.string()),
-        pa.field("o_totalprice",    pa.decimal128(15, 2)),
-        pa.field("o_orderdate",     pa.date32()),
-        pa.field("o_orderpriority", pa.string()),
-        pa.field("o_clerk",         pa.string()),
-        pa.field("o_shippriority",  pa.int32()),
-        pa.field("o_comment",       pa.string()),
-    ]),
-    "part": pa.schema([
-        pa.field("p_partkey",     pa.int64()),
-        pa.field("p_name",        pa.string()),
-        pa.field("p_mfgr",        pa.string()),
-        pa.field("p_brand",       pa.string()),
-        pa.field("p_type",        pa.string()),
-        pa.field("p_size",        pa.int32()),
-        pa.field("p_container",   pa.string()),
-        pa.field("p_retailprice", pa.decimal128(15, 2)),
-        pa.field("p_comment",     pa.string()),
-    ]),
-    "partsupp": pa.schema([
-        pa.field("ps_partkey",    pa.int64()),
-        pa.field("ps_suppkey",    pa.int64()),
-        pa.field("ps_availqty",   pa.int32()),
-        pa.field("ps_supplycost", pa.decimal128(15, 2)),
-        pa.field("ps_comment",    pa.string()),
-    ]),
-    "region": pa.schema([
-        pa.field("r_regionkey", pa.int64()),
-        pa.field("r_name",      pa.string()),
-        pa.field("r_comment",   pa.string()),
-    ]),
-    "supplier": pa.schema([
-        pa.field("s_suppkey",   pa.int64()),
-        pa.field("s_name",      pa.string()),
-        pa.field("s_address",   pa.string()),
-        pa.field("s_nationkey", pa.int64()),
-        pa.field("s_phone",     pa.string()),
-        pa.field("s_acctbal",   pa.decimal128(15, 2)),
-        pa.field("s_comment",   pa.string()),
-    ]),
-}
-
-for table in tables:
-    schema = schemas[table]
-    tbl = csv.read_csv(
-        f"benchmark_data/{table}.tbl",
-        read_options=csv.ReadOptions(column_names=schema.names),
-        parse_options=csv.ParseOptions(delimiter="|"),
-        convert_options=csv.ConvertOptions(column_types=schema),
-    )
-    orc.write_table(tbl, f"benchmark_data/{table}.orc")
diff --git a/scripts/generate-tpch.sh b/scripts/generate-tpch.sh
deleted file mode 100755
index 78931e0f..00000000
--- a/scripts/generate-tpch.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
-BASE_DIR=$SCRIPT_DIR/..
-DATA_DIR=$BASE_DIR/benchmark_data
-VENV_BIN=$BASE_DIR/venv/bin
-
-SCALE_FACTOR=${1:-1}
-
-# Generate TBL data
-mkdir -p $DATA_DIR
-docker run --rm \
-  -v $DATA_DIR:/data \
-  ghcr.io/scalytics/tpch-docker:main -vf -s $SCALE_FACTOR
-# Removing trailing |
-sed -i 's/.$//' benchmark_data/*.tbl
-$VENV_BIN/python $SCRIPT_DIR/convert_tpch.py
-echo "Done"
diff --git a/scripts/generate_arrow.py b/scripts/generate_arrow.py
deleted file mode 100644
index 6515f216..00000000
--- a/scripts/generate_arrow.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Requires pyarrow to be installed
-import glob
-from pyarrow import orc, feather
-
-dir = "tests/integration/data"
-
-files = glob.glob(f"{dir}/expected/*")
-files = [file.removeprefix(f"{dir}/expected/").removesuffix(".jsn.gz") for file in files]
-
-ignore_files = [
-    "TestOrcFile.testTimestamp" # Root data type isn't struct
-]
-
-files = [file for file in files if file not in ignore_files]
-
-for file in files:
-    print(f"Converting {file} from ORC to feather")
-    table = orc.read_table(f"{dir}/{file}.orc")
-    feather.write_feather(table, f"{dir}/expected_arrow/{file}.feather")
diff --git a/scripts/generate_orc.py b/scripts/generate_orc.py
deleted file mode 100644
index a962bf8d..00000000
--- a/scripts/generate_orc.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import shutil
-import glob
-from datetime import date as dt
-from decimal import Decimal as Dec
-from pyspark.sql import SparkSession
-from pyspark.sql.types import *
-
-dir = "tests/basic/data"
-
-# We're using Spark because it supports lzo compression writing
-# (PyArrow supports all except lzo writing)
-
-spark = SparkSession.builder.getOrCreate()
-
-# TODO: how to do char and varchar?
-# TODO: struct, list, map, union
-df = spark.createDataFrame(
-    [ #   bool, int8,         int16,         int32,         int64,       float32,        float64,               decimal,              binary,       utf8,           date32
-        ( None, None,          None,          None,          None,          None,           None,                  None,                None,       None,             None),
-        ( True,    0,             0,             0,             0,           0.0,            0.0,                Dec(0),         "".encode(),         "", dt(1970,  1,  1)),
-        (False,    1,             1,             1,             1,           1.0,            1.0,                Dec(1),        "a".encode(),        "a", dt(1970,  1,  2)),
-        (False,   -1,            -1,            -1,            -1,          -1.0,           -1.0,               Dec(-1),        " ".encode(),        " ", dt(1969, 12, 31)),
-        ( True,  127, (1 << 15) - 1, (1 << 31) - 1, (1 << 63) - 1,  float("inf"),   float("inf"),  Dec(123456789.12345),   "encode".encode(),   "encode", dt(9999, 12, 31)),
-        ( True, -128,    -(1 << 15),    -(1 << 31),    -(1 << 63), float("-inf"),  float("-inf"), Dec(-999999999.99999),   "decode".encode(),   "decode", dt(1582, 10, 15)),
-        ( True,   50,            50,            50,            50,     3.1415927,  3.14159265359,       Dec(-31256.123), "大熊和奏".encode(), "大熊和奏", dt(1582, 10, 16)),
-        ( True,   51,            51,            51,            51,    -3.1415927, -3.14159265359,          Dec(1241000), "斉藤朱夏".encode(), "斉藤朱夏", dt(2000,  1,  1)),
-        ( True,   52,            52,            52,            52,           1.1,            1.1,              Dec(1.1), "鈴原希実".encode(), "鈴原希実", dt(3000, 12, 31)),
-        (False,   53,            53,            53,            53,          -1.1,           -1.1,          Dec(0.99999),       "🤔".encode(),       "🤔", dt(1900,  1,  1)),
-        ( None, None,          None,          None,          None,          None,           None,                  None,                None,       None,             None),
-    ],
-    StructType(
-        [
-            StructField("boolean",      BooleanType()),
-            StructField(   "int8",         ByteType()),
-            StructField(  "int16",        ShortType()),
-            StructField(  "int32",      IntegerType()),
-            StructField(  "int64",         LongType()),
-            StructField("float32",        FloatType()),
-            StructField("float64",       DoubleType()),
-            StructField("decimal", DecimalType(15, 5)),
-            StructField( "binary",       BinaryType()),
-            StructField(   "utf8",       StringType()),
-            StructField( "date32",         DateType()),
-        ]
-    ),
-).coalesce(1)
-
-compression = ["none", "snappy", "zlib", "lzo", "zstd", "lz4"]
-for c in compression:
-    df.write.format("orc")\
-      .option("compression", c)\
-      .mode("overwrite")\
-      .save(f"{dir}/alltypes.{c}")
-    # Since Spark saves into a directory
-    # Move out and rename the expected single ORC file (because of coalesce above)
-    orc_file = glob.glob(f"{dir}/alltypes.{c}/*.orc")[0]
-    shutil.move(orc_file, f"{dir}/alltypes.{c}.orc")
-    shutil.rmtree(f"{dir}/alltypes.{c}")
diff --git a/scripts/generate_orc_timestamps.py b/scripts/generate_orc_timestamps.py
deleted file mode 100644
index aac13f03..00000000
--- a/scripts/generate_orc_timestamps.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from datetime import datetime as dttm
-import pyarrow as pa
-from pyarrow import orc
-from pyarrow import parquet
-import pyorc
-
-dir = "tests/basic/data"
-
-schema = pa.schema([
-    pa.field('timestamp_notz', pa.timestamp("ns")),
-    pa.field('timestamp_utc', pa.timestamp("ns", tz="UTC")),
-])
-
-# TODO test with other non-UTC timezones
-arr = pa.array([
-    None,
-    dttm(1970,  1,  1,  0,  0,  0),
-    dttm(1970,  1,  2, 23, 59, 59),
-    dttm(1969, 12, 31, 23, 59, 59),
-    dttm(2262,  4, 11, 11, 47, 16),
-    dttm(2001,  4, 13,  2, 14,  0),
-    dttm(2000,  1,  1, 23, 10, 10),
-    dttm(1900,  1,  1, 14, 25, 14),
-])
-table = pa.Table.from_arrays([arr, arr], schema=schema)
-orc.write_table(table, f"{dir}/pyarrow_timestamps.orc")
-
-
-# pyarrow overflows when trying to write this, so we have to use pyorc instead
-class TimestampConverter:
-    @staticmethod
-    def from_orc(obj, tz):
-        return obj
-    @staticmethod
-    def to_orc(obj, tz):
-        return obj
-schema = pyorc.Struct(
-    id=pyorc.Int(),
-    timestamp=pyorc.Timestamp()
-)
-with open(f"{dir}/overflowing_timestamps.orc", "wb") as f:
-    with pyorc.Writer(
-        f,
-        schema,
-        converters={pyorc.TypeKind.TIMESTAMP: TimestampConverter},
-    ) as writer:
-        writer.write((1, (12345678, 0)))
-        writer.write((2, (-62135596800, 0)))
-        writer.write((3, (12345678, 0)))
diff --git a/scripts/prepare-test-data.sh b/scripts/prepare-test-data.sh
deleted file mode 100755
index 45833134..00000000
--- a/scripts/prepare-test-data.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
-BASE_DIR=$SCRIPT_DIR/..
-VENV_BIN=$BASE_DIR/venv/bin
-
-cd $BASE_DIR
-$VENV_BIN/python $SCRIPT_DIR/write.py
-$VENV_BIN/python $SCRIPT_DIR/generate_orc.py
-$VENV_BIN/python $SCRIPT_DIR/generate_orc_timestamps.py
-$VENV_BIN/python $SCRIPT_DIR/generate_arrow.py
-
-echo "Done"
-
diff --git a/scripts/setup-venv.sh b/scripts/setup-venv.sh
deleted file mode 100755
index 3e8bff7e..00000000
--- a/scripts/setup-venv.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
-BASE_DIR=$SCRIPT_DIR/..
-VENV_BIN=$BASE_DIR/venv/bin
-
-python3 -m venv $BASE_DIR/venv
-
-$VENV_BIN/pip install -U pyorc pyspark pyarrow
-
-echo "Done"
-
diff --git a/scripts/write.py b/scripts/write.py
deleted file mode 100644
index e4d08ad3..00000000
--- a/scripts/write.py
+++ /dev/null
@@ -1,213 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Copied from https://github.com/DataEngineeringLabs/orc-format/blob/416490db0214fc51d53289253c0ee91f7fc9bc17/write.py
-import random
-import datetime
-import pyorc
-
-dir = "tests/basic/data"
-
-data = {
-    "a": [1.0, 2.0, None, 4.0, 5.0],
-    "b": [True, False, None, True, False],
-    "str_direct": ["a", "cccccc", None, "ddd", "ee"],
-    "d": ["a", "bb", None, "ccc", "ddd"],
-    "e": ["ddd", "cc", None, "bb", "a"],
-    "f": ["aaaaa", "bbbbb", None, "ccccc", "ddddd"],
-    "int_short_repeated": [5, 5, None, 5, 5],
-    "int_neg_short_repeated": [-5, -5, None, -5, -5],
-    "int_delta": [1, 2, None, 4, 5],
-    "int_neg_delta": [5, 4, None, 2, 1],
-    "int_direct": [1, 6, None, 3, 2],
-    "int_neg_direct": [-1, -6, None, -3, -2],
-    "bigint_direct": [1, 6, None, 3, 2],
-    "bigint_neg_direct": [-1, -6, None, -3, -2],
-    "bigint_other": [5, -5, 1, 5, 5],
-    "utf8_increase": ["a", "bb", "ccc", "dddd", "eeeee"],
-    "utf8_decrease": ["eeeee", "dddd", "ccc", "bb", "a"],
-    "timestamp_simple": [datetime.datetime(2023, 4, 1, 20, 15, 30, 2000), datetime.datetime.fromtimestamp(int('1629617204525777000')/1000000000), datetime.datetime(2023, 1, 1), datetime.datetime(2023, 2, 1), datetime.datetime(2023, 3, 1)],
-    "date_simple": [datetime.date(2023, 4, 1), datetime.date(2023, 3, 1), datetime.date(2023, 1, 1), datetime.date(2023, 2, 1), datetime.date(2023, 3, 1)],
-    "tinyint_simple": [-1, None, 1, 127, -127]
-}
-
-def infer_schema(data):
-    schema = "struct<"
-    for key, value in data.items():
-        dt = type(value[0])
-        if dt == float:
-            dt = "float"
-        elif dt == int:
-            dt = "int"
-        elif dt == bool:
-            dt = "boolean"
-        elif dt == str:
-            dt = "string"
-        elif dt == dict: 
-            dt = infer_schema(value[0])
-        elif key.startswith("timestamp"):
-            dt = "timestamp"
-        elif key.startswith("date"):
-            dt = "date"
-        else:
-            print(key,value,dt)
-            raise NotImplementedError
-        if key.startswith("double"):
-            dt = "double"
-        if key.startswith("bigint"):
-            dt = "bigint"
-        if key.startswith("tinyint"):
-            dt = "tinyint"
-        schema += key + ":" + dt + ","
-
-    schema = schema[:-1] + ">"
-    return schema
-
-
-
-def _write(
-    schema: str,
-    data,
-    file_name: str,
-    compression=pyorc.CompressionKind.NONE,
-    dict_key_size_threshold=0.0,
-):
-    output = open(file_name, "wb")
-    writer = pyorc.Writer(
-        output,
-        schema,
-        dict_key_size_threshold=dict_key_size_threshold,
-        # use a small number to ensure that compression crosses value boundaries
-        compression_block_size=32,
-        compression=compression,
-    )
-    num_rows = len(list(data.values())[0])
-    for x in range(num_rows):
-        row = tuple(values[x] for values in data.values())
-        writer.write(row)
-    writer.close()
-
-    with open(file_name, "rb") as f:
-        reader = pyorc.Reader(f)
-        list(reader)
-
-nested_struct = {
-    "nest": [
-        (1.0,True),
-        (3.0,None),
-        (None,None),
-        None,
-        (-3.0,None)
-    ],
-}
-
-_write("struct<nest:struct<a:float,b:boolean>>", nested_struct, f"{dir}/nested_struct.orc")
-
-
-nested_array = {
-    "value": [
-        [1, None, 3, 43, 5],
-        [5, None, 32, 4, 15],
-        [16, None, 3, 4, 5, 6],
-        None,
-        [3, None],
-    ],
-}
-
-_write("struct<value:array<int>>", nested_array, f"{dir}/nested_array.orc")
-
-
-nested_array_float = {
-    "value": [
-        [1.0, 3.0],
-        [None, 2.0],
-    ],
-}
-
-_write("struct<value:array<float>>", nested_array_float, f"{dir}/nested_array_float.orc")
-
-nested_array_struct = {
-    "value": [
-        [(1.0, 1, "01"), (2.0, 2, "02")],
-        [None, (3.0, 3, "03")],
-    ],
-}
-
-_write("struct<value:array<struct<a:float,b:int,c:string>>>", nested_array_struct, f"{dir}/nested_array_struct.orc")
-
-nested_map = {
-    "map": [
-            {"zero": 0, "one": 1},
-            None,
-            {"two": 2, "tree": 3},
-            {"one": 1, "two": 2, "nill": None},
-    ],
-}
-
-_write("struct<map:map<string,int>>", nested_map, f"{dir}/nested_map.orc")
-
-nested_map_struct = {
-    "map": [
-        {"01": (1.0, 1, "01"), "02": (2.0, 1, "02")},
-        None,
-        {"03": (3.0, 3, "03"), "04": (4.0, 4, "04")},
-    ],
-}
-
-_write("struct<value:map<string,struct<a:float,b:int,c:string>>>", nested_map_struct, f"{dir}/nested_map_struct.orc")
-
-
-_write(
-    infer_schema(data),
-    data,
-    f"{dir}/test.orc",
-)
-
-data_boolean = {
-    "long": [True] * 32,
-}
-
-_write("struct<long:boolean>", data_boolean, f"{dir}/long_bool.orc")
-
-_write("struct<long:boolean>", data_boolean, f"{dir}/long_bool_gzip.orc", pyorc.CompressionKind.ZLIB)
-
-data_dict = {
-    "dict": ["abcd", "efgh"] * 32,
-}
-
-_write("struct<dict:string>", data_dict, f"{dir}/string_long.orc")
-
-data_dict = {
-    "dict": ["abc", "efgh"] * 32,
-}
-
-_write("struct<dict:string>", data_dict, f"{dir}/string_dict.orc", dict_key_size_threshold=0.1)
-
-_write("struct<dict:string>", data_dict, f"{dir}/string_dict_gzip.orc", pyorc.CompressionKind.ZLIB)
-
-data_dict = {
-    "dict": ["abcd", "efgh"] * (10**4 // 2),
-}
-
-_write("struct<dict:string>", data_dict, f"{dir}/string_long_long.orc")
-_write("struct<dict:string>", data_dict, f"{dir}/string_long_long_gzip.orc", pyorc.CompressionKind.ZLIB)
-
-long_f32 = {
-    "dict": [random.uniform(0, 1) for _ in range(10**6)],
-}
-
-_write("struct<dict:float>", long_f32, f"{dir}/f32_long_long_gzip.orc", pyorc.CompressionKind.ZLIB)
diff --git a/src/array_decoder/decimal.rs b/src/array_decoder/decimal.rs
deleted file mode 100644
index 40e766e8..00000000
--- a/src/array_decoder/decimal.rs
+++ /dev/null
@@ -1,156 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::cmp::Ordering;
-use std::sync::Arc;
-
-use arrow::array::ArrayRef;
-use arrow::buffer::NullBuffer;
-use arrow::datatypes::Decimal128Type;
-use snafu::ResultExt;
-
-use crate::encoding::decimal::UnboundedVarintStreamDecoder;
-use crate::encoding::integer::get_rle_reader;
-use crate::encoding::PrimitiveValueDecoder;
-use crate::error::ArrowSnafu;
-use crate::proto::stream::Kind;
-use crate::stripe::Stripe;
-use crate::{column::Column, error::Result};
-
-use super::{ArrayBatchDecoder, PresentDecoder, PrimitiveArrayDecoder};
-
-pub fn new_decimal_decoder(
-    column: &Column,
-    stripe: &Stripe,
-    precision: u32,
-    fixed_scale: u32,
-) -> Result<Box<dyn ArrayBatchDecoder>> {
-    let varint_iter = stripe.stream_map().get(column, Kind::Data);
-    let varint_iter = Box::new(UnboundedVarintStreamDecoder::new(varint_iter));
-
-    // Scale is specified on a per varint basis (in addition to being encoded in the type)
-    let scale_iter = stripe.stream_map().get(column, Kind::Secondary);
-    let scale_iter = get_rle_reader::<i32, _>(column, scale_iter)?;
-
-    let present = PresentDecoder::from_stripe(stripe, column);
-
-    let iter = DecimalScaleRepairDecoder {
-        varint_iter,
-        scale_iter,
-        fixed_scale,
-    };
-    let iter = Box::new(iter);
-
-    Ok(Box::new(DecimalArrayDecoder::new(
-        precision as u8,
-        fixed_scale as i8,
-        iter,
-        present,
-    )))
-}
-
-/// Wrapper around PrimitiveArrayDecoder to allow specifying the precision and scale
-/// of the output decimal array.
-pub struct DecimalArrayDecoder {
-    precision: u8,
-    scale: i8,
-    inner: PrimitiveArrayDecoder<Decimal128Type>,
-}
-
-impl DecimalArrayDecoder {
-    pub fn new(
-        precision: u8,
-        scale: i8,
-        iter: Box<dyn PrimitiveValueDecoder<i128> + Send>,
-        present: Option<PresentDecoder>,
-    ) -> Self {
-        let inner = PrimitiveArrayDecoder::<Decimal128Type>::new(iter, present);
-        Self {
-            precision,
-            scale,
-            inner,
-        }
-    }
-}
-
-impl ArrayBatchDecoder for DecimalArrayDecoder {
-    fn next_batch(
-        &mut self,
-        batch_size: usize,
-        parent_present: Option<&NullBuffer>,
-    ) -> Result<ArrayRef> {
-        let array = self
-            .inner
-            .next_primitive_batch(batch_size, parent_present)?
-            .with_precision_and_scale(self.precision, self.scale)
-            .context(ArrowSnafu)?;
-        let array = Arc::new(array) as ArrayRef;
-        Ok(array)
-    }
-}
-
-/// This iter fixes the scales of the varints decoded as scale is specified on a per
-/// varint basis, and needs to align with type specified scale
-struct DecimalScaleRepairDecoder {
-    varint_iter: Box<dyn PrimitiveValueDecoder<i128> + Send>,
-    scale_iter: Box<dyn PrimitiveValueDecoder<i32> + Send>,
-    fixed_scale: u32,
-}
-
-impl PrimitiveValueDecoder<i128> for DecimalScaleRepairDecoder {
-    fn decode(&mut self, out: &mut [i128]) -> Result<()> {
-        // TODO: can probably optimize, reuse buffers?
-        let mut varint = vec![0; out.len()];
-        let mut scale = vec![0; out.len()];
-        self.varint_iter.decode(&mut varint)?;
-        self.scale_iter.decode(&mut scale)?;
-        for (index, (&varint, &scale)) in varint.iter().zip(scale.iter()).enumerate() {
-            out[index] = fix_i128_scale(varint, self.fixed_scale, scale);
-        }
-        Ok(())
-    }
-}
-
-fn fix_i128_scale(i: i128, fixed_scale: u32, varying_scale: i32) -> i128 {
-    // TODO: Verify with C++ impl in ORC repo, which does this cast
-    //       Not sure why scale stream can be signed if it gets casted to unsigned anyway
-    //       https://github.com/apache/orc/blob/0014bec1e4cdd1206f5bae4f5c2000b9300c6eb1/c%2B%2B/src/ColumnReader.cc#L1459-L1476
-    let varying_scale = varying_scale as u32;
-    match fixed_scale.cmp(&varying_scale) {
-        Ordering::Less => {
-            // fixed_scale < varying_scale
-            // Current scale of number is greater than scale of the array type
-            // So need to divide to align the scale
-            // TODO: this differs from C++ implementation, need to verify
-            let scale_factor = varying_scale - fixed_scale;
-            // TODO: replace with lookup table?
-            let scale_factor = 10_i128.pow(scale_factor);
-            i / scale_factor
-        }
-        Ordering::Equal => i,
-        Ordering::Greater => {
-            // fixed_scale > varying_scale
-            // Current scale of number is smaller than scale of the array type
-            // So need to multiply to align the scale
-            // TODO: this differs from C++ implementation, need to verify
-            let scale_factor = fixed_scale - varying_scale;
-            // TODO: replace with lookup table?
-            let scale_factor = 10_i128.pow(scale_factor);
-            i * scale_factor
-        }
-    }
-}
diff --git a/src/array_decoder/list.rs b/src/array_decoder/list.rs
deleted file mode 100644
index 7a5ccfb8..00000000
--- a/src/array_decoder/list.rs
+++ /dev/null
@@ -1,88 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-
-use arrow::array::{ArrayRef, ListArray};
-use arrow::buffer::{NullBuffer, OffsetBuffer};
-use arrow::datatypes::{Field, FieldRef};
-use snafu::ResultExt;
-
-use crate::array_decoder::derive_present_vec;
-use crate::column::Column;
-use crate::encoding::integer::get_unsigned_rle_reader;
-use crate::encoding::PrimitiveValueDecoder;
-use crate::proto::stream::Kind;
-
-use crate::error::{ArrowSnafu, Result};
-use crate::stripe::Stripe;
-
-use super::{array_decoder_factory, ArrayBatchDecoder, PresentDecoder};
-
-pub struct ListArrayDecoder {
-    inner: Box<dyn ArrayBatchDecoder>,
-    present: Option<PresentDecoder>,
-    lengths: Box<dyn PrimitiveValueDecoder<i64> + Send>,
-    field: FieldRef,
-}
-
-impl ListArrayDecoder {
-    pub fn new(column: &Column, field: Arc<Field>, stripe: &Stripe) -> Result<Self> {
-        let present = PresentDecoder::from_stripe(stripe, column);
-
-        let child = &column.children()[0];
-        let inner = array_decoder_factory(child, field.clone(), stripe)?;
-
-        let reader = stripe.stream_map().get(column, Kind::Length);
-        let lengths = get_unsigned_rle_reader(column, reader);
-
-        Ok(Self {
-            inner,
-            present,
-            lengths,
-            field,
-        })
-    }
-}
-
-impl ArrayBatchDecoder for ListArrayDecoder {
-    fn next_batch(
-        &mut self,
-        batch_size: usize,
-        parent_present: Option<&NullBuffer>,
-    ) -> Result<ArrayRef> {
-        let present =
-            derive_present_vec(&mut self.present, parent_present, batch_size).transpose()?;
-
-        let mut lengths = vec![0; batch_size];
-        if let Some(present) = &present {
-            self.lengths.decode_spaced(&mut lengths, present)?;
-        } else {
-            self.lengths.decode(&mut lengths)?;
-        }
-        let total_length: i64 = lengths.iter().sum();
-        // Fetch child array as one Array with total_length elements
-        let child_array = self.inner.next_batch(total_length as usize, None)?;
-        let offsets = OffsetBuffer::from_lengths(lengths.into_iter().map(|l| l as usize));
-        let null_buffer = present.map(NullBuffer::from);
-
-        let array = ListArray::try_new(self.field.clone(), offsets, child_array, null_buffer)
-            .context(ArrowSnafu)?;
-        let array = Arc::new(array);
-        Ok(array)
-    }
-}
diff --git a/src/array_decoder/map.rs b/src/array_decoder/map.rs
deleted file mode 100644
index 7c019884..00000000
--- a/src/array_decoder/map.rs
+++ /dev/null
@@ -1,105 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-
-use arrow::array::{ArrayRef, MapArray, StructArray};
-use arrow::buffer::{NullBuffer, OffsetBuffer};
-use arrow::datatypes::{Field, Fields};
-use snafu::ResultExt;
-
-use crate::array_decoder::derive_present_vec;
-use crate::column::Column;
-use crate::encoding::integer::get_unsigned_rle_reader;
-use crate::encoding::PrimitiveValueDecoder;
-use crate::error::{ArrowSnafu, Result};
-use crate::proto::stream::Kind;
-use crate::stripe::Stripe;
-
-use super::{array_decoder_factory, ArrayBatchDecoder, PresentDecoder};
-
-pub struct MapArrayDecoder {
-    keys: Box<dyn ArrayBatchDecoder>,
-    values: Box<dyn ArrayBatchDecoder>,
-    present: Option<PresentDecoder>,
-    lengths: Box<dyn PrimitiveValueDecoder<i64> + Send>,
-    fields: Fields,
-}
-
-impl MapArrayDecoder {
-    pub fn new(
-        column: &Column,
-        keys_field: Arc<Field>,
-        values_field: Arc<Field>,
-        stripe: &Stripe,
-    ) -> Result<Self> {
-        let present = PresentDecoder::from_stripe(stripe, column);
-
-        let keys_column = &column.children()[0];
-        let keys = array_decoder_factory(keys_column, keys_field.clone(), stripe)?;
-
-        let values_column = &column.children()[1];
-        let values = array_decoder_factory(values_column, values_field.clone(), stripe)?;
-
-        let reader = stripe.stream_map().get(column, Kind::Length);
-        let lengths = get_unsigned_rle_reader(column, reader);
-
-        let fields = Fields::from(vec![keys_field, values_field]);
-
-        Ok(Self {
-            keys,
-            values,
-            present,
-            lengths,
-            fields,
-        })
-    }
-}
-
-impl ArrayBatchDecoder for MapArrayDecoder {
-    fn next_batch(
-        &mut self,
-        batch_size: usize,
-        parent_present: Option<&NullBuffer>,
-    ) -> Result<ArrayRef> {
-        let present =
-            derive_present_vec(&mut self.present, parent_present, batch_size).transpose()?;
-
-        let mut lengths = vec![0; batch_size];
-        if let Some(present) = &present {
-            self.lengths.decode_spaced(&mut lengths, present)?;
-        } else {
-            self.lengths.decode(&mut lengths)?;
-        }
-        let total_length: i64 = lengths.iter().sum();
-        // Fetch key and value arrays, each with total_length elements
-        // Fetch child array as one Array with total_length elements
-        let keys_array = self.keys.next_batch(total_length as usize, None)?;
-        let values_array = self.values.next_batch(total_length as usize, None)?;
-        // Compose the keys + values array into a StructArray with two entries
-        let entries =
-            StructArray::try_new(self.fields.clone(), vec![keys_array, values_array], None)
-                .context(ArrowSnafu)?;
-        let offsets = OffsetBuffer::from_lengths(lengths.into_iter().map(|l| l as usize));
-
-        let field = Arc::new(Field::new_struct("entries", self.fields.clone(), false));
-        let array =
-            MapArray::try_new(field, offsets, entries, present, false).context(ArrowSnafu)?;
-        let array = Arc::new(array);
-        Ok(array)
-    }
-}
diff --git a/src/array_decoder/mod.rs b/src/array_decoder/mod.rs
deleted file mode 100644
index df4eedaf..00000000
--- a/src/array_decoder/mod.rs
+++ /dev/null
@@ -1,451 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-
-use arrow::array::{ArrayRef, BooleanArray, BooleanBufferBuilder, PrimitiveArray};
-use arrow::buffer::NullBuffer;
-use arrow::datatypes::ArrowNativeTypeOp;
-use arrow::datatypes::ArrowPrimitiveType;
-use arrow::datatypes::{DataType as ArrowDataType, Field};
-use arrow::datatypes::{
-    Date32Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, SchemaRef,
-};
-use arrow::record_batch::{RecordBatch, RecordBatchOptions};
-use snafu::{ensure, ResultExt};
-
-use crate::column::Column;
-use crate::encoding::boolean::BooleanDecoder;
-use crate::encoding::byte::ByteRleDecoder;
-use crate::encoding::float::FloatDecoder;
-use crate::encoding::integer::get_rle_reader;
-use crate::encoding::PrimitiveValueDecoder;
-use crate::error::{
-    self, MismatchedSchemaSnafu, Result, UnexpectedSnafu, UnsupportedTypeVariantSnafu,
-};
-use crate::proto::stream::Kind;
-use crate::schema::DataType;
-use crate::stripe::Stripe;
-
-use self::decimal::new_decimal_decoder;
-use self::list::ListArrayDecoder;
-use self::map::MapArrayDecoder;
-use self::string::{new_binary_decoder, new_string_decoder};
-use self::struct_decoder::StructArrayDecoder;
-use self::timestamp::{new_timestamp_decoder, new_timestamp_instant_decoder};
-use self::union::UnionArrayDecoder;
-
-mod decimal;
-mod list;
-mod map;
-mod string;
-mod struct_decoder;
-mod timestamp;
-mod union;
-
-pub trait ArrayBatchDecoder: Send {
-    /// Used as base for decoding ORC columns into Arrow arrays. Provide an input `batch_size`
-    /// which specifies the upper limit of the number of values returned in the output array.
-    ///
-    /// If parent nested type (e.g. Struct) indicates a null in it's PRESENT stream,
-    /// then the child doesn't have a value (similar to other nullability). So we need
-    /// to take care to insert these null values as Arrow requires the child to hold
-    /// data in the null slot of the child.
-    // TODO: encode nullability in generic -> for a given column in a stripe, we will always know
-    //       upfront if we need to bother with nulls or not, so we don't need to keep checking this
-    //       for every invocation of next_batch
-    // NOTE: null parent may have non-null child, so would still have to account for this
-    fn next_batch(
-        &mut self,
-        batch_size: usize,
-        parent_present: Option<&NullBuffer>,
-    ) -> Result<ArrayRef>;
-}
-
-struct PrimitiveArrayDecoder<T: ArrowPrimitiveType> {
-    iter: Box<dyn PrimitiveValueDecoder<T::Native> + Send>,
-    present: Option<PresentDecoder>,
-}
-
-impl<T: ArrowPrimitiveType> PrimitiveArrayDecoder<T> {
-    pub fn new(
-        iter: Box<dyn PrimitiveValueDecoder<T::Native> + Send>,
-        present: Option<PresentDecoder>,
-    ) -> Self {
-        Self { iter, present }
-    }
-
-    fn next_primitive_batch(
-        &mut self,
-        batch_size: usize,
-        parent_present: Option<&NullBuffer>,
-    ) -> Result<PrimitiveArray<T>> {
-        let present =
-            derive_present_vec(&mut self.present, parent_present, batch_size).transpose()?;
-        let mut data = vec![T::Native::ZERO; batch_size];
-        match present {
-            Some(present) => {
-                self.iter.decode_spaced(data.as_mut_slice(), &present)?;
-                let array = PrimitiveArray::<T>::new(data.into(), Some(present));
-                Ok(array)
-            }
-            None => {
-                self.iter.decode(data.as_mut_slice())?;
-                let array = PrimitiveArray::<T>::from_iter_values(data);
-                Ok(array)
-            }
-        }
-    }
-}
-
-impl<T: ArrowPrimitiveType> ArrayBatchDecoder for PrimitiveArrayDecoder<T> {
-    fn next_batch(
-        &mut self,
-        batch_size: usize,
-        parent_present: Option<&NullBuffer>,
-    ) -> Result<ArrayRef> {
-        let array = self.next_primitive_batch(batch_size, parent_present)?;
-        let array = Arc::new(array) as ArrayRef;
-        Ok(array)
-    }
-}
-
-type Int64ArrayDecoder = PrimitiveArrayDecoder<Int64Type>;
-type Int32ArrayDecoder = PrimitiveArrayDecoder<Int32Type>;
-type Int16ArrayDecoder = PrimitiveArrayDecoder<Int16Type>;
-type Int8ArrayDecoder = PrimitiveArrayDecoder<Int8Type>;
-type Float32ArrayDecoder = PrimitiveArrayDecoder<Float32Type>;
-type Float64ArrayDecoder = PrimitiveArrayDecoder<Float64Type>;
-type DateArrayDecoder = PrimitiveArrayDecoder<Date32Type>; // TODO: does ORC encode as i64 or i32?
-
-struct BooleanArrayDecoder {
-    iter: Box<dyn PrimitiveValueDecoder<bool> + Send>,
-    present: Option<PresentDecoder>,
-}
-
-impl BooleanArrayDecoder {
-    pub fn new(
-        iter: Box<dyn PrimitiveValueDecoder<bool> + Send>,
-        present: Option<PresentDecoder>,
-    ) -> Self {
-        Self { iter, present }
-    }
-}
-
-impl ArrayBatchDecoder for BooleanArrayDecoder {
-    fn next_batch(
-        &mut self,
-        batch_size: usize,
-        parent_present: Option<&NullBuffer>,
-    ) -> Result<ArrayRef> {
-        let present =
-            derive_present_vec(&mut self.present, parent_present, batch_size).transpose()?;
-        let mut data = vec![false; batch_size];
-        let array = match present {
-            Some(present) => {
-                self.iter.decode_spaced(data.as_mut_slice(), &present)?;
-                BooleanArray::new(data.into(), Some(present))
-            }
-            None => {
-                self.iter.decode(data.as_mut_slice())?;
-                BooleanArray::from(data)
-            }
-        };
-        Ok(Arc::new(array))
-    }
-}
-
-struct PresentDecoder {
-    // TODO: ideally directly reference BooleanDecoder, doing this way to avoid
-    //       the generic propagation that would be required (BooleanDecoder<R: Read>)
-    inner: Box<dyn PrimitiveValueDecoder<bool> + Send>,
-}
-
-impl PresentDecoder {
-    fn from_stripe(stripe: &Stripe, column: &Column) -> Option<Self> {
-        stripe
-            .stream_map()
-            .get_opt(column, Kind::Present)
-            .map(|stream| {
-                let inner = Box::new(BooleanDecoder::new(stream));
-                PresentDecoder { inner }
-            })
-    }
-
-    fn next_buffer(&mut self, size: usize) -> Result<NullBuffer> {
-        let mut data = vec![false; size];
-        self.inner.decode(&mut data)?;
-        Ok(NullBuffer::from(data))
-    }
-}
-
-fn merge_parent_present(
-    parent_present: &NullBuffer,
-    present: Result<NullBuffer>,
-) -> Result<NullBuffer> {
-    let present = present?;
-    let non_null_count = parent_present.len() - parent_present.null_count();
-    debug_assert!(present.len() == non_null_count);
-    let mut builder = BooleanBufferBuilder::new(parent_present.len());
-    builder.append_n(parent_present.len(), false);
-    for (idx, p) in parent_present.valid_indices().zip(present.iter()) {
-        builder.set_bit(idx, p);
-    }
-    Ok(builder.finish().into())
-}
-
-fn derive_present_vec(
-    present: &mut Option<PresentDecoder>,
-    parent_present: Option<&NullBuffer>,
-    batch_size: usize,
-) -> Option<Result<NullBuffer>> {
-    match (present, parent_present) {
-        (Some(present), Some(parent_present)) => {
-            let element_count = parent_present.len() - parent_present.null_count();
-            let present = present.next_buffer(element_count);
-            Some(merge_parent_present(parent_present, present))
-        }
-        (Some(present), None) => Some(present.next_buffer(batch_size)),
-        (None, Some(parent_present)) => Some(Ok(parent_present.clone())),
-        (None, None) => None,
-    }
-}
-
-pub struct NaiveStripeDecoder {
-    stripe: Stripe,
-    schema_ref: SchemaRef,
-    decoders: Vec<Box<dyn ArrayBatchDecoder>>,
-    index: usize,
-    batch_size: usize,
-    number_of_rows: usize,
-}
-
-impl Iterator for NaiveStripeDecoder {
-    type Item = Result<RecordBatch>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.index < self.number_of_rows {
-            let record = self
-                .decode_next_batch(self.number_of_rows - self.index)
-                .transpose()?;
-            self.index += self.batch_size;
-            Some(record)
-        } else {
-            None
-        }
-    }
-}
-
-pub fn array_decoder_factory(
-    column: &Column,
-    field: Arc<Field>,
-    stripe: &Stripe,
-) -> Result<Box<dyn ArrayBatchDecoder>> {
-    let decoder: Box<dyn ArrayBatchDecoder> = match (column.data_type(), field.data_type()) {
-        // TODO: try make branches more generic, reduce duplication
-        (DataType::Boolean { .. }, ArrowDataType::Boolean) => {
-            let iter = stripe.stream_map().get(column, Kind::Data);
-            let iter = Box::new(BooleanDecoder::new(iter));
-            let present = PresentDecoder::from_stripe(stripe, column);
-            Box::new(BooleanArrayDecoder::new(iter, present))
-        }
-        (DataType::Byte { .. }, ArrowDataType::Int8) => {
-            let iter = stripe.stream_map().get(column, Kind::Data);
-            let iter = Box::new(ByteRleDecoder::new(iter));
-            let present = PresentDecoder::from_stripe(stripe, column);
-            Box::new(Int8ArrayDecoder::new(iter, present))
-        }
-        (DataType::Short { .. }, ArrowDataType::Int16) => {
-            let iter = stripe.stream_map().get(column, Kind::Data);
-            let iter = get_rle_reader(column, iter)?;
-            let present = PresentDecoder::from_stripe(stripe, column);
-            Box::new(Int16ArrayDecoder::new(iter, present))
-        }
-        (DataType::Int { .. }, ArrowDataType::Int32) => {
-            let iter = stripe.stream_map().get(column, Kind::Data);
-            let iter = get_rle_reader(column, iter)?;
-            let present = PresentDecoder::from_stripe(stripe, column);
-            Box::new(Int32ArrayDecoder::new(iter, present))
-        }
-        (DataType::Long { .. }, ArrowDataType::Int64) => {
-            let iter = stripe.stream_map().get(column, Kind::Data);
-            let iter = get_rle_reader(column, iter)?;
-            let present = PresentDecoder::from_stripe(stripe, column);
-            Box::new(Int64ArrayDecoder::new(iter, present))
-        }
-        (DataType::Float { .. }, ArrowDataType::Float32) => {
-            let iter = stripe.stream_map().get(column, Kind::Data);
-            let iter = Box::new(FloatDecoder::new(iter));
-            let present = PresentDecoder::from_stripe(stripe, column);
-            Box::new(Float32ArrayDecoder::new(iter, present))
-        }
-        (DataType::Double { .. }, ArrowDataType::Float64) => {
-            let iter = stripe.stream_map().get(column, Kind::Data);
-            let iter = Box::new(FloatDecoder::new(iter));
-            let present = PresentDecoder::from_stripe(stripe, column);
-            Box::new(Float64ArrayDecoder::new(iter, present))
-        }
-        (DataType::String { .. }, ArrowDataType::Utf8)
-        | (DataType::Varchar { .. }, ArrowDataType::Utf8)
-        | (DataType::Char { .. }, ArrowDataType::Utf8) => new_string_decoder(column, stripe)?,
-        (DataType::Binary { .. }, ArrowDataType::Binary) => new_binary_decoder(column, stripe)?,
-        (
-            DataType::Decimal {
-                precision, scale, ..
-            },
-            ArrowDataType::Decimal128(a_precision, a_scale),
-        ) if *precision as u8 == *a_precision && *scale as i8 == *a_scale => {
-            new_decimal_decoder(column, stripe, *precision, *scale)?
-        }
-        (DataType::Timestamp { .. }, field_type) => {
-            new_timestamp_decoder(column, field_type.clone(), stripe)?
-        }
-        (DataType::TimestampWithLocalTimezone { .. }, field_type) => {
-            new_timestamp_instant_decoder(column, field_type.clone(), stripe)?
-        }
-        (DataType::Date { .. }, ArrowDataType::Date32) => {
-            // TODO: allow Date64
-            let iter = stripe.stream_map().get(column, Kind::Data);
-            let iter = get_rle_reader(column, iter)?;
-            let present = PresentDecoder::from_stripe(stripe, column);
-            Box::new(DateArrayDecoder::new(iter, present))
-        }
-        (DataType::Struct { .. }, ArrowDataType::Struct(fields)) => {
-            Box::new(StructArrayDecoder::new(column, fields.clone(), stripe)?)
-        }
-        (DataType::List { .. }, ArrowDataType::List(field)) => {
-            // TODO: add support for ArrowDataType::LargeList
-            Box::new(ListArrayDecoder::new(column, field.clone(), stripe)?)
-        }
-        (DataType::Map { .. }, ArrowDataType::Map(entries, sorted)) => {
-            ensure!(!sorted, UnsupportedTypeVariantSnafu { msg: "Sorted map" });
-            let ArrowDataType::Struct(entries) = entries.data_type() else {
-                UnexpectedSnafu {
-                    msg: "arrow Map with non-Struct entry type".to_owned(),
-                }
-                .fail()?
-            };
-            ensure!(
-                entries.len() == 2,
-                UnexpectedSnafu {
-                    msg: format!(
-                        "arrow Map with {} columns per entry (expected 2)",
-                        entries.len()
-                    )
-                }
-            );
-            let keys_field = entries[0].clone();
-            let values_field = entries[1].clone();
-
-            Box::new(MapArrayDecoder::new(
-                column,
-                keys_field,
-                values_field,
-                stripe,
-            )?)
-        }
-        (DataType::Union { .. }, ArrowDataType::Union(fields, _)) => {
-            Box::new(UnionArrayDecoder::new(column, fields.clone(), stripe)?)
-        }
-        (data_type, field_type) => {
-            return MismatchedSchemaSnafu {
-                orc_type: data_type.clone(),
-                arrow_type: field_type.clone(),
-            }
-            .fail()
-        }
-    };
-
-    Ok(decoder)
-}
-
-impl NaiveStripeDecoder {
-    fn inner_decode_next_batch(&mut self, remaining: usize) -> Result<Vec<ArrayRef>> {
-        let chunk = self.batch_size.min(remaining);
-
-        let mut fields = Vec::with_capacity(self.stripe.columns().len());
-
-        for decoder in &mut self.decoders {
-            let array = decoder.next_batch(chunk, None)?;
-            if array.is_empty() {
-                break;
-            } else {
-                fields.push(array);
-            }
-        }
-
-        Ok(fields)
-    }
-
-    fn decode_next_batch(&mut self, remaining: usize) -> Result<Option<RecordBatch>> {
-        let fields = self.inner_decode_next_batch(remaining)?;
-
-        if fields.is_empty() {
-            if remaining == 0 {
-                Ok(None)
-            } else {
-                // In case of empty projection, we need to create a RecordBatch with `row_count` only
-                // to reflect the row number
-                Ok(Some(
-                    RecordBatch::try_new_with_options(
-                        Arc::clone(&self.schema_ref),
-                        fields,
-                        &RecordBatchOptions::new()
-                            .with_row_count(Some(self.batch_size.min(remaining))),
-                    )
-                    .context(error::ConvertRecordBatchSnafu)?,
-                ))
-            }
-        } else {
-            //TODO(weny): any better way?
-            let fields = self
-                .schema_ref
-                .fields
-                .into_iter()
-                .map(|field| field.name())
-                .zip(fields)
-                .collect::<Vec<_>>();
-
-            Ok(Some(
-                RecordBatch::try_from_iter(fields).context(error::ConvertRecordBatchSnafu)?,
-            ))
-        }
-    }
-
-    pub fn new(stripe: Stripe, schema_ref: SchemaRef, batch_size: usize) -> Result<Self> {
-        let mut decoders = Vec::with_capacity(stripe.columns().len());
-        let number_of_rows = stripe.number_of_rows();
-
-        for (col, field) in stripe
-            .columns()
-            .iter()
-            .zip(schema_ref.fields.iter().cloned())
-        {
-            let decoder = array_decoder_factory(col, field, &stripe)?;
-            decoders.push(decoder);
-        }
-
-        Ok(Self {
-            stripe,
-            schema_ref,
-            decoders,
-            index: 0,
-            batch_size,
-            number_of_rows,
-        })
-    }
-}
diff --git a/src/array_decoder/string.rs b/src/array_decoder/string.rs
deleted file mode 100644
index dda72f07..00000000
--- a/src/array_decoder/string.rs
+++ /dev/null
@@ -1,195 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::io::Read;
-use std::marker::PhantomData;
-use std::sync::Arc;
-
-use arrow::array::{ArrayRef, DictionaryArray, GenericByteArray, StringArray};
-use arrow::buffer::{Buffer, NullBuffer, OffsetBuffer};
-use arrow::compute::kernels::cast;
-use arrow::datatypes::{ByteArrayType, DataType, GenericBinaryType, GenericStringType};
-use snafu::ResultExt;
-
-use crate::array_decoder::derive_present_vec;
-use crate::column::Column;
-use crate::compression::Decompressor;
-use crate::encoding::integer::get_unsigned_rle_reader;
-use crate::encoding::PrimitiveValueDecoder;
-use crate::error::{ArrowSnafu, IoSnafu, Result};
-use crate::proto::column_encoding::Kind as ColumnEncodingKind;
-use crate::proto::stream::Kind;
-use crate::stripe::Stripe;
-
-use super::{ArrayBatchDecoder, Int64ArrayDecoder, PresentDecoder};
-
-// TODO: reduce duplication with string below
-pub fn new_binary_decoder(column: &Column, stripe: &Stripe) -> Result<Box<dyn ArrayBatchDecoder>> {
-    let present = PresentDecoder::from_stripe(stripe, column);
-
-    let lengths = stripe.stream_map().get(column, Kind::Length);
-    let lengths = get_unsigned_rle_reader(column, lengths);
-
-    let bytes = Box::new(stripe.stream_map().get(column, Kind::Data));
-    Ok(Box::new(BinaryArrayDecoder::new(bytes, lengths, present)))
-}
-
-pub fn new_string_decoder(column: &Column, stripe: &Stripe) -> Result<Box<dyn ArrayBatchDecoder>> {
-    let kind = column.encoding().kind();
-    let present = PresentDecoder::from_stripe(stripe, column);
-
-    let lengths = stripe.stream_map().get(column, Kind::Length);
-    let lengths = get_unsigned_rle_reader(column, lengths);
-
-    match kind {
-        ColumnEncodingKind::Direct | ColumnEncodingKind::DirectV2 => {
-            let bytes = Box::new(stripe.stream_map().get(column, Kind::Data));
-            Ok(Box::new(DirectStringArrayDecoder::new(
-                bytes, lengths, present,
-            )))
-        }
-        ColumnEncodingKind::Dictionary | ColumnEncodingKind::DictionaryV2 => {
-            let bytes = Box::new(stripe.stream_map().get(column, Kind::DictionaryData));
-            // TODO: is this always guaranteed to be set for all dictionaries?
-            let dictionary_size = column.dictionary_size();
-            // We assume here we have fetched all the dictionary strings (according to size above)
-            let dictionary_strings = DirectStringArrayDecoder::new(bytes, lengths, None)
-                .next_byte_batch(dictionary_size, None)?;
-            let dictionary_strings = Arc::new(dictionary_strings);
-
-            let indexes = stripe.stream_map().get(column, Kind::Data);
-            let indexes = get_unsigned_rle_reader(column, indexes);
-            let indexes = Int64ArrayDecoder::new(indexes, present);
-
-            Ok(Box::new(DictionaryStringArrayDecoder::new(
-                indexes,
-                dictionary_strings,
-            )?))
-        }
-    }
-}
-
-// TODO: check this offset size type
-pub type DirectStringArrayDecoder = GenericByteArrayDecoder<GenericStringType<i32>>;
-pub type BinaryArrayDecoder = GenericByteArrayDecoder<GenericBinaryType<i32>>;
-
-pub struct GenericByteArrayDecoder<T: ByteArrayType> {
-    bytes: Box<Decompressor>,
-    lengths: Box<dyn PrimitiveValueDecoder<i64> + Send>,
-    present: Option<PresentDecoder>,
-    phantom: PhantomData<T>,
-}
-
-impl<T: ByteArrayType> GenericByteArrayDecoder<T> {
-    fn new(
-        bytes: Box<Decompressor>,
-        lengths: Box<dyn PrimitiveValueDecoder<i64> + Send>,
-        present: Option<PresentDecoder>,
-    ) -> Self {
-        Self {
-            bytes,
-            lengths,
-            present,
-            phantom: Default::default(),
-        }
-    }
-
-    fn next_byte_batch(
-        &mut self,
-        batch_size: usize,
-        parent_present: Option<&NullBuffer>,
-    ) -> Result<GenericByteArray<T>> {
-        let present =
-            derive_present_vec(&mut self.present, parent_present, batch_size).transpose()?;
-
-        let mut lengths = vec![0; batch_size];
-        if let Some(present) = &present {
-            self.lengths.decode_spaced(&mut lengths, present)?;
-        } else {
-            self.lengths.decode(&mut lengths)?;
-        }
-        let total_length: i64 = lengths.iter().sum();
-        // Fetch all data bytes at once
-        let mut bytes = Vec::with_capacity(total_length as usize);
-        self.bytes
-            .by_ref()
-            .take(total_length as u64)
-            .read_to_end(&mut bytes)
-            .context(IoSnafu)?;
-        let bytes = Buffer::from(bytes);
-        let offsets =
-            OffsetBuffer::<T::Offset>::from_lengths(lengths.into_iter().map(|l| l as usize));
-
-        let null_buffer = match present {
-            // Edge case where keys of map cannot have a null buffer
-            Some(present) if present.null_count() == 0 => None,
-            _ => present,
-        };
-        let array =
-            GenericByteArray::<T>::try_new(offsets, bytes, null_buffer).context(ArrowSnafu)?;
-        Ok(array)
-    }
-}
-
-impl<T: ByteArrayType> ArrayBatchDecoder for GenericByteArrayDecoder<T> {
-    fn next_batch(
-        &mut self,
-        batch_size: usize,
-        parent_present: Option<&NullBuffer>,
-    ) -> Result<ArrayRef> {
-        let array = self.next_byte_batch(batch_size, parent_present)?;
-        let array = Arc::new(array) as ArrayRef;
-        Ok(array)
-    }
-}
-
-pub struct DictionaryStringArrayDecoder {
-    indexes: Int64ArrayDecoder,
-    dictionary: Arc<StringArray>,
-}
-
-impl DictionaryStringArrayDecoder {
-    fn new(indexes: Int64ArrayDecoder, dictionary: Arc<StringArray>) -> Result<Self> {
-        Ok(Self {
-            indexes,
-            dictionary,
-        })
-    }
-}
-
-impl ArrayBatchDecoder for DictionaryStringArrayDecoder {
-    fn next_batch(
-        &mut self,
-        batch_size: usize,
-        parent_present: Option<&NullBuffer>,
-    ) -> Result<ArrayRef> {
-        let keys = self
-            .indexes
-            .next_primitive_batch(batch_size, parent_present)?;
-        // TODO: ORC spec states: For dictionary encodings the dictionary is sorted
-        //       (in lexicographical order of bytes in the UTF-8 encodings).
-        //       So we can set the is_ordered property here?
-        let array = DictionaryArray::try_new(keys, self.dictionary.clone()).context(ArrowSnafu)?;
-        // Cast back to StringArray to ensure all stripes have consistent datatype
-        // TODO: Is there anyway to preserve the dictionary encoding?
-        //       This costs performance.
-        let array = cast(&array, &DataType::Utf8).context(ArrowSnafu)?;
-
-        let array = Arc::new(array);
-        Ok(array)
-    }
-}
diff --git a/src/array_decoder/struct_decoder.rs b/src/array_decoder/struct_decoder.rs
deleted file mode 100644
index b09bb3b7..00000000
--- a/src/array_decoder/struct_decoder.rs
+++ /dev/null
@@ -1,79 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-
-use arrow::{
-    array::{ArrayRef, StructArray},
-    buffer::NullBuffer,
-    datatypes::Fields,
-};
-use snafu::ResultExt;
-
-use crate::error::Result;
-use crate::stripe::Stripe;
-use crate::{column::Column, error::ArrowSnafu};
-
-use super::{array_decoder_factory, derive_present_vec, ArrayBatchDecoder, PresentDecoder};
-
-pub struct StructArrayDecoder {
-    fields: Fields,
-    decoders: Vec<Box<dyn ArrayBatchDecoder>>,
-    present: Option<PresentDecoder>,
-}
-
-impl StructArrayDecoder {
-    pub fn new(column: &Column, fields: Fields, stripe: &Stripe) -> Result<Self> {
-        let present = PresentDecoder::from_stripe(stripe, column);
-
-        let decoders = column
-            .children()
-            .iter()
-            .zip(fields.iter().cloned())
-            .map(|(child, field)| array_decoder_factory(child, field, stripe))
-            .collect::<Result<Vec<_>>>()?;
-
-        Ok(Self {
-            decoders,
-            present,
-            fields,
-        })
-    }
-}
-
-impl ArrayBatchDecoder for StructArrayDecoder {
-    fn next_batch(
-        &mut self,
-        batch_size: usize,
-        parent_present: Option<&NullBuffer>,
-    ) -> Result<ArrayRef> {
-        let present =
-            derive_present_vec(&mut self.present, parent_present, batch_size).transpose()?;
-
-        let child_arrays = self
-            .decoders
-            .iter_mut()
-            .map(|child| child.next_batch(batch_size, present.as_ref()))
-            .collect::<Result<Vec<_>>>()?;
-
-        let null_buffer = present.map(NullBuffer::from);
-        let array = StructArray::try_new(self.fields.clone(), child_arrays, null_buffer)
-            .context(ArrowSnafu)?;
-        let array = Arc::new(array);
-        Ok(array)
-    }
-}
diff --git a/src/array_decoder/timestamp.rs b/src/array_decoder/timestamp.rs
deleted file mode 100644
index b6c45b5a..00000000
--- a/src/array_decoder/timestamp.rs
+++ /dev/null
@@ -1,314 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-
-use crate::{
-    array_decoder::ArrowDataType,
-    column::Column,
-    encoding::{
-        integer::{get_rle_reader, get_unsigned_rle_reader},
-        timestamp::{TimestampDecoder, TimestampNanosecondAsDecimalDecoder},
-        PrimitiveValueDecoder,
-    },
-    error::{MismatchedSchemaSnafu, Result},
-    proto::stream::Kind,
-    stripe::Stripe,
-};
-use arrow::datatypes::{
-    ArrowTimestampType, Decimal128Type, DecimalType, TimeUnit, TimestampMicrosecondType,
-    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
-};
-use arrow::{array::ArrayRef, buffer::NullBuffer};
-use chrono::offset::TimeZone;
-use chrono::TimeDelta;
-use chrono_tz::{Tz, UTC};
-
-use super::{
-    decimal::DecimalArrayDecoder, ArrayBatchDecoder, PresentDecoder, PrimitiveArrayDecoder,
-};
-use crate::error::UnsupportedTypeVariantSnafu;
-
-const NANOSECONDS_IN_SECOND: i128 = 1_000_000_000;
-const NANOSECOND_DIGITS: i8 = 9;
-
-/// Seconds from ORC epoch of 1 January 2015, which serves as the 0
-/// point for all timestamp values, to the UNIX epoch of 1 January 1970.
-const ORC_EPOCH_UTC_SECONDS_SINCE_UNIX_EPOCH: i64 = 1_420_070_400;
-
-fn get_inner_timestamp_decoder<T: ArrowTimestampType + Send>(
-    column: &Column,
-    stripe: &Stripe,
-    seconds_since_unix_epoch: i64,
-) -> Result<PrimitiveArrayDecoder<T>> {
-    let data = stripe.stream_map().get(column, Kind::Data);
-    let data = get_rle_reader(column, data)?;
-
-    let secondary = stripe.stream_map().get(column, Kind::Secondary);
-    let secondary = get_unsigned_rle_reader(column, secondary);
-
-    let present = PresentDecoder::from_stripe(stripe, column);
-
-    let iter = Box::new(TimestampDecoder::<T>::new(
-        seconds_since_unix_epoch,
-        data,
-        secondary,
-    ));
-    Ok(PrimitiveArrayDecoder::<T>::new(iter, present))
-}
-
-fn get_timestamp_decoder<T: ArrowTimestampType + Send>(
-    column: &Column,
-    stripe: &Stripe,
-    seconds_since_unix_epoch: i64,
-) -> Result<Box<dyn ArrayBatchDecoder>> {
-    let inner = get_inner_timestamp_decoder::<T>(column, stripe, seconds_since_unix_epoch)?;
-    match stripe.writer_tz() {
-        Some(writer_tz) => Ok(Box::new(TimestampOffsetArrayDecoder { inner, writer_tz })),
-        None => Ok(Box::new(inner)),
-    }
-}
-
-fn get_timestamp_instant_decoder<T: ArrowTimestampType + Send>(
-    column: &Column,
-    stripe: &Stripe,
-) -> Result<Box<dyn ArrayBatchDecoder>> {
-    // TIMESTAMP_INSTANT is encoded as UTC so we don't check writer timezone in stripe
-    let inner =
-        get_inner_timestamp_decoder::<T>(column, stripe, ORC_EPOCH_UTC_SECONDS_SINCE_UNIX_EPOCH)?;
-    Ok(Box::new(TimestampInstantArrayDecoder(inner)))
-}
-
-fn decimal128_decoder(
-    column: &Column,
-    stripe: &Stripe,
-    seconds_since_unix_epoch: i64,
-    writer_tz: Option<Tz>,
-) -> Result<DecimalArrayDecoder> {
-    let data = stripe.stream_map().get(column, Kind::Data);
-    let data = get_rle_reader(column, data)?;
-
-    let secondary = stripe.stream_map().get(column, Kind::Secondary);
-    let secondary = get_rle_reader(column, secondary)?;
-
-    let present = PresentDecoder::from_stripe(stripe, column);
-
-    let iter = TimestampNanosecondAsDecimalDecoder::new(seconds_since_unix_epoch, data, secondary);
-
-    let iter: Box<dyn PrimitiveValueDecoder<i128> + Send> = match writer_tz {
-        Some(UTC) | None => Box::new(iter),
-        Some(writer_tz) => Box::new(TimestampNanosecondAsDecimalWithTzDecoder(iter, writer_tz)),
-    };
-
-    Ok(DecimalArrayDecoder::new(
-        Decimal128Type::MAX_PRECISION,
-        NANOSECOND_DIGITS,
-        iter,
-        present,
-    ))
-}
-
-/// Decodes a TIMESTAMP column stripe into batches of Timestamp{Nano,Micro,Milli,}secondArrays
-/// with no timezone. Will convert timestamps from writer timezone to UTC if a writer timezone
-/// is specified for the stripe.
-pub fn new_timestamp_decoder(
-    column: &Column,
-    field_type: ArrowDataType,
-    stripe: &Stripe,
-) -> Result<Box<dyn ArrayBatchDecoder>> {
-    let seconds_since_unix_epoch = match stripe.writer_tz() {
-        Some(writer_tz) => {
-            // If writer timezone exists then we must take the ORC epoch according
-            // to that timezone, and find seconds since UTC UNIX epoch for the base.
-            writer_tz
-                .with_ymd_and_hms(2015, 1, 1, 0, 0, 0)
-                .unwrap()
-                .timestamp()
-        }
-        None => {
-            // No writer timezone, we can assume UTC, so we can use known fixed value
-            // for the base offset.
-            ORC_EPOCH_UTC_SECONDS_SINCE_UNIX_EPOCH
-        }
-    };
-
-    match field_type {
-        ArrowDataType::Timestamp(TimeUnit::Second, None) => {
-            get_timestamp_decoder::<TimestampSecondType>(column, stripe, seconds_since_unix_epoch)
-        }
-        ArrowDataType::Timestamp(TimeUnit::Millisecond, None) => {
-            get_timestamp_decoder::<TimestampMillisecondType>(
-                column,
-                stripe,
-                seconds_since_unix_epoch,
-            )
-        }
-        ArrowDataType::Timestamp(TimeUnit::Microsecond, None) => {
-            get_timestamp_decoder::<TimestampMicrosecondType>(
-                column,
-                stripe,
-                seconds_since_unix_epoch,
-            )
-        }
-        ArrowDataType::Timestamp(TimeUnit::Nanosecond, None) => {
-            get_timestamp_decoder::<TimestampNanosecondType>(
-                column,
-                stripe,
-                seconds_since_unix_epoch,
-            )
-        }
-        ArrowDataType::Decimal128(Decimal128Type::MAX_PRECISION, NANOSECOND_DIGITS) => {
-            Ok(Box::new(decimal128_decoder(
-                column,
-                stripe,
-                seconds_since_unix_epoch,
-                stripe.writer_tz(),
-            )?))
-        }
-        _ => MismatchedSchemaSnafu {
-            orc_type: column.data_type().clone(),
-            arrow_type: field_type,
-        }
-        .fail(),
-    }
-}
-
-/// Decodes a TIMESTAMP_INSTANT column stripe into batches of
-/// Timestamp{Nano,Micro,Milli,}secondArrays with UTC timezone.
-pub fn new_timestamp_instant_decoder(
-    column: &Column,
-    field_type: ArrowDataType,
-    stripe: &Stripe,
-) -> Result<Box<dyn ArrayBatchDecoder>> {
-    match field_type {
-        ArrowDataType::Timestamp(TimeUnit::Second, Some(tz)) if tz.as_ref() == "UTC" => {
-            get_timestamp_instant_decoder::<TimestampSecondType>(column, stripe)
-        }
-        ArrowDataType::Timestamp(TimeUnit::Millisecond, Some(tz)) if tz.as_ref() == "UTC" => {
-            get_timestamp_instant_decoder::<TimestampMillisecondType>(column, stripe)
-        }
-        ArrowDataType::Timestamp(TimeUnit::Microsecond, Some(tz)) if tz.as_ref() == "UTC" => {
-            get_timestamp_instant_decoder::<TimestampMicrosecondType>(column, stripe)
-        }
-        ArrowDataType::Timestamp(TimeUnit::Nanosecond, Some(tz)) if tz.as_ref() == "UTC" => {
-            get_timestamp_instant_decoder::<TimestampNanosecondType>(column, stripe)
-        }
-        ArrowDataType::Timestamp(_, Some(_)) => UnsupportedTypeVariantSnafu {
-            msg: "Non-UTC Arrow timestamps",
-        }
-        .fail(),
-        ArrowDataType::Decimal128(Decimal128Type::MAX_PRECISION, NANOSECOND_DIGITS) => {
-            Ok(Box::new(decimal128_decoder(
-                column,
-                stripe,
-                ORC_EPOCH_UTC_SECONDS_SINCE_UNIX_EPOCH,
-                None,
-            )?))
-        }
-        _ => MismatchedSchemaSnafu {
-            orc_type: column.data_type().clone(),
-            arrow_type: field_type,
-        }
-        .fail()?,
-    }
-}
-
-/// Wrapper around PrimitiveArrayDecoder to decode timestamps which are encoded in
-/// timezone of the writer to their UTC value.
-struct TimestampOffsetArrayDecoder<T: ArrowTimestampType> {
-    inner: PrimitiveArrayDecoder<T>,
-    writer_tz: chrono_tz::Tz,
-}
-
-impl<T: ArrowTimestampType> ArrayBatchDecoder for TimestampOffsetArrayDecoder<T> {
-    fn next_batch(
-        &mut self,
-        batch_size: usize,
-        parent_present: Option<&NullBuffer>,
-    ) -> Result<ArrayRef> {
-        let array = self
-            .inner
-            .next_primitive_batch(batch_size, parent_present)?;
-
-        let convert_timezone = |ts| {
-            // Convert from writer timezone to reader timezone (which we default to UTC)
-            // TODO: more efficient way of doing this?
-            self.writer_tz
-                .timestamp_nanos(ts)
-                .naive_local()
-                .and_utc()
-                .timestamp_nanos_opt()
-        };
-        let array = array
-            // first try to convert all non-nullable batches to non-nullable batches
-            .try_unary::<_, T, _>(|ts| convert_timezone(ts).ok_or(()))
-            // in the rare case one of the values was out of the timeunit's range (eg. see
-            // <https://docs.rs/chrono/latest/chrono/struct.DateTime.html#method.timestamp_nanos_opt>),
-            // for nanoseconds), try again by allowing a nullable batch as output
-            .unwrap_or_else(|()| array.unary_opt::<_, T>(convert_timezone));
-        let array = Arc::new(array) as ArrayRef;
-        Ok(array)
-    }
-}
-
-/// Wrapper around PrimitiveArrayDecoder to allow specifying the timezone of the output
-/// timestamp array as UTC.
-struct TimestampInstantArrayDecoder<T: ArrowTimestampType>(PrimitiveArrayDecoder<T>);
-
-impl<T: ArrowTimestampType> ArrayBatchDecoder for TimestampInstantArrayDecoder<T> {
-    fn next_batch(
-        &mut self,
-        batch_size: usize,
-        parent_present: Option<&NullBuffer>,
-    ) -> Result<ArrayRef> {
-        let array = self
-            .0
-            .next_primitive_batch(batch_size, parent_present)?
-            .with_timezone("UTC");
-        let array = Arc::new(array) as ArrayRef;
-        Ok(array)
-    }
-}
-
-struct TimestampNanosecondAsDecimalWithTzDecoder(TimestampNanosecondAsDecimalDecoder, Tz);
-
-impl TimestampNanosecondAsDecimalWithTzDecoder {
-    fn next_inner(&self, ts: i128) -> i128 {
-        let seconds = ts.div_euclid(NANOSECONDS_IN_SECOND);
-        let nanoseconds = ts.rem_euclid(NANOSECONDS_IN_SECOND);
-
-        // The addition may panic, because chrono stores dates in an i32,
-        // which can be overflowed with an i64 of seconds.
-        let dt = (self.1.timestamp_nanos(0)
-            + TimeDelta::new(seconds as i64, nanoseconds as u32)
-                .expect("TimeDelta duration out of bound"))
-        .naive_local()
-        .and_utc();
-
-        (dt.timestamp() as i128) * NANOSECONDS_IN_SECOND + (dt.timestamp_subsec_nanos() as i128)
-    }
-}
-
-impl PrimitiveValueDecoder<i128> for TimestampNanosecondAsDecimalWithTzDecoder {
-    fn decode(&mut self, out: &mut [i128]) -> Result<()> {
-        self.0.decode(out)?;
-        for x in out.iter_mut() {
-            *x = self.next_inner(*x);
-        }
-        Ok(())
-    }
-}
diff --git a/src/array_decoder/union.rs b/src/array_decoder/union.rs
deleted file mode 100644
index 39af4eac..00000000
--- a/src/array_decoder/union.rs
+++ /dev/null
@@ -1,137 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-
-use arrow::array::{ArrayRef, BooleanBufferBuilder, UnionArray};
-use arrow::buffer::{Buffer, NullBuffer};
-use arrow::datatypes::UnionFields;
-use snafu::ResultExt;
-
-use crate::column::Column;
-use crate::encoding::byte::ByteRleDecoder;
-use crate::encoding::PrimitiveValueDecoder;
-use crate::error::ArrowSnafu;
-use crate::error::Result;
-use crate::proto::stream::Kind;
-use crate::stripe::Stripe;
-
-use super::{array_decoder_factory, derive_present_vec, ArrayBatchDecoder, PresentDecoder};
-
-/// Decode ORC Union column into batches of Arrow Sparse UnionArrays.
-pub struct UnionArrayDecoder {
-    // fields and variants should have same length
-    // TODO: encode this assumption into types
-    fields: UnionFields,
-    variants: Vec<Box<dyn ArrayBatchDecoder>>,
-    tags: Box<dyn PrimitiveValueDecoder<i8> + Send>,
-    present: Option<PresentDecoder>,
-}
-
-impl UnionArrayDecoder {
-    pub fn new(column: &Column, fields: UnionFields, stripe: &Stripe) -> Result<Self> {
-        let present = PresentDecoder::from_stripe(stripe, column);
-
-        let tags = stripe.stream_map().get(column, Kind::Data);
-        let tags = Box::new(ByteRleDecoder::new(tags));
-
-        let variants = column
-            .children()
-            .iter()
-            .zip(fields.iter())
-            .map(|(child, (_id, field))| array_decoder_factory(child, field.clone(), stripe))
-            .collect::<Result<Vec<_>>>()?;
-
-        Ok(Self {
-            fields,
-            variants,
-            tags,
-            present,
-        })
-    }
-}
-
-impl ArrayBatchDecoder for UnionArrayDecoder {
-    fn next_batch(
-        &mut self,
-        batch_size: usize,
-        parent_present: Option<&NullBuffer>,
-    ) -> Result<ArrayRef> {
-        let present =
-            derive_present_vec(&mut self.present, parent_present, batch_size).transpose()?;
-        let mut tags = vec![0; batch_size];
-        match &present {
-            Some(present) => {
-                // Since UnionArrays don't have nullability, we rely on child arrays.
-                // So we default to first child (tag 0) for any nulls from this parent Union.
-                self.tags.decode_spaced(&mut tags, present)?;
-            }
-            None => {
-                self.tags.decode(&mut tags)?;
-            }
-        }
-
-        // Calculate nullability for children
-        let mut children_nullability = (0..self.variants.len())
-            .map(|index| {
-                let mut child_present = BooleanBufferBuilder::new(batch_size);
-                child_present.append_n(batch_size, false);
-                for idx in tags
-                    .iter()
-                    .enumerate()
-                    // Where the parent expects the value of the child, we set to non-null.
-                    // Otherwise for the sparse spots, we leave as null in children.
-                    .filter_map(|(idx, &tag)| (tag as usize == index).then_some(idx))
-                {
-                    child_present.set_bit(idx, true);
-                }
-                child_present
-            })
-            .collect::<Vec<_>>();
-        // If parent says a slot is null, we need to ensure the first child (0-index) also
-        // encodes this information, since as mentioned before, Arrow UnionArrays don't store
-        // nullability and rely on their children. We default to first child to encode this
-        // information so need to enforce that here.
-        if let Some(present) = &present {
-            let first_child = &mut children_nullability[0];
-            for idx in present
-                .iter()
-                .enumerate()
-                .filter_map(|(idx, parent_present)| (!parent_present).then_some(idx))
-            {
-                first_child.set_bit(idx, false);
-            }
-        }
-
-        let child_arrays = self
-            .variants
-            .iter_mut()
-            .zip(children_nullability)
-            .map(|(decoder, mut present)| {
-                let present = NullBuffer::from(present.finish());
-                decoder.next_batch(batch_size, Some(&present))
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        // Currently default to decoding as Sparse UnionArray so no value offsets
-        let type_ids = Buffer::from_vec(tags.clone()).into();
-        let array = UnionArray::try_new(self.fields.clone(), type_ids, None, child_arrays)
-            .context(ArrowSnafu)?;
-        let array = Arc::new(array);
-        Ok(array)
-    }
-}
diff --git a/src/arrow_reader.rs b/src/arrow_reader.rs
deleted file mode 100644
index fd0eb53d..00000000
--- a/src/arrow_reader.rs
+++ /dev/null
@@ -1,227 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::collections::HashMap;
-use std::ops::Range;
-use std::sync::Arc;
-
-use arrow::datatypes::SchemaRef;
-use arrow::error::ArrowError;
-use arrow::record_batch::{RecordBatch, RecordBatchReader};
-
-use crate::array_decoder::NaiveStripeDecoder;
-use crate::error::Result;
-use crate::projection::ProjectionMask;
-use crate::reader::metadata::{read_metadata, FileMetadata};
-use crate::reader::ChunkReader;
-use crate::schema::RootDataType;
-use crate::stripe::{Stripe, StripeMetadata};
-
-const DEFAULT_BATCH_SIZE: usize = 8192;
-
-pub struct ArrowReaderBuilder<R> {
-    pub(crate) reader: R,
-    pub(crate) file_metadata: Arc<FileMetadata>,
-    pub(crate) batch_size: usize,
-    pub(crate) projection: ProjectionMask,
-    pub(crate) schema_ref: Option<SchemaRef>,
-    pub(crate) file_byte_range: Option<Range<usize>>,
-}
-
-impl<R> ArrowReaderBuilder<R> {
-    pub(crate) fn new(reader: R, file_metadata: Arc<FileMetadata>) -> Self {
-        Self {
-            reader,
-            file_metadata,
-            batch_size: DEFAULT_BATCH_SIZE,
-            projection: ProjectionMask::all(),
-            schema_ref: None,
-            file_byte_range: None,
-        }
-    }
-
-    pub fn file_metadata(&self) -> &FileMetadata {
-        &self.file_metadata
-    }
-
-    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
-        self.batch_size = batch_size;
-        self
-    }
-
-    pub fn with_projection(mut self, projection: ProjectionMask) -> Self {
-        self.projection = projection;
-        self
-    }
-
-    pub fn with_schema(mut self, schema: SchemaRef) -> Self {
-        self.schema_ref = Some(schema);
-        self
-    }
-
-    /// Specifies a range of file bytes that will read the strips offset within this range
-    pub fn with_file_byte_range(mut self, range: Range<usize>) -> Self {
-        self.file_byte_range = Some(range);
-        self
-    }
-
-    /// Returns the currently computed schema
-    ///
-    /// Unless [`with_schema`](Self::with_schema) was called, this is computed dynamically
-    /// based on the current projection and the underlying file format.
-    pub fn schema(&self) -> SchemaRef {
-        let projected_data_type = self
-            .file_metadata
-            .root_data_type()
-            .project(&self.projection);
-        let metadata = self
-            .file_metadata
-            .user_custom_metadata()
-            .iter()
-            .map(|(key, value)| (key.clone(), String::from_utf8_lossy(value).to_string()))
-            .collect::<HashMap<_, _>>();
-        self.schema_ref
-            .clone()
-            .unwrap_or_else(|| Arc::new(projected_data_type.create_arrow_schema(&metadata)))
-    }
-}
-
-impl<R: ChunkReader> ArrowReaderBuilder<R> {
-    pub fn try_new(mut reader: R) -> Result<Self> {
-        let file_metadata = Arc::new(read_metadata(&mut reader)?);
-        Ok(Self::new(reader, file_metadata))
-    }
-
-    pub fn build(self) -> ArrowReader<R> {
-        let schema_ref = self.schema();
-        let projected_data_type = self
-            .file_metadata
-            .root_data_type()
-            .project(&self.projection);
-        let cursor = Cursor {
-            reader: self.reader,
-            file_metadata: self.file_metadata,
-            projected_data_type,
-            stripe_index: 0,
-            file_byte_range: self.file_byte_range,
-        };
-        ArrowReader {
-            cursor,
-            schema_ref,
-            current_stripe: None,
-            batch_size: self.batch_size,
-        }
-    }
-}
-
-pub struct ArrowReader<R> {
-    cursor: Cursor<R>,
-    schema_ref: SchemaRef,
-    current_stripe: Option<Box<dyn Iterator<Item = Result<RecordBatch>> + Send>>,
-    batch_size: usize,
-}
-
-impl<R> ArrowReader<R> {
-    pub fn total_row_count(&self) -> u64 {
-        self.cursor.file_metadata.number_of_rows()
-    }
-}
-
-impl<R: ChunkReader> ArrowReader<R> {
-    fn try_advance_stripe(&mut self) -> Result<Option<RecordBatch>, ArrowError> {
-        let stripe = self.cursor.next().transpose()?;
-        match stripe {
-            Some(stripe) => {
-                let decoder =
-                    NaiveStripeDecoder::new(stripe, self.schema_ref.clone(), self.batch_size)?;
-                self.current_stripe = Some(Box::new(decoder));
-                self.next().transpose()
-            }
-            None => Ok(None),
-        }
-    }
-}
-
-impl<R: ChunkReader> RecordBatchReader for ArrowReader<R> {
-    fn schema(&self) -> SchemaRef {
-        self.schema_ref.clone()
-    }
-}
-
-impl<R: ChunkReader> Iterator for ArrowReader<R> {
-    type Item = Result<RecordBatch, ArrowError>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        match self.current_stripe.as_mut() {
-            Some(stripe) => {
-                match stripe
-                    .next()
-                    .map(|batch| batch.map_err(|err| ArrowError::ExternalError(Box::new(err))))
-                {
-                    Some(rb) => Some(rb),
-                    None => self.try_advance_stripe().transpose(),
-                }
-            }
-            None => self.try_advance_stripe().transpose(),
-        }
-    }
-}
-
-pub(crate) struct Cursor<R> {
-    pub reader: R,
-    pub file_metadata: Arc<FileMetadata>,
-    pub projected_data_type: RootDataType,
-    pub stripe_index: usize,
-    pub file_byte_range: Option<Range<usize>>,
-}
-
-impl<R: ChunkReader> Cursor<R> {
-    fn get_stripe_metadatas(&self) -> Vec<StripeMetadata> {
-        if let Some(range) = self.file_byte_range.clone() {
-            self.file_metadata
-                .stripe_metadatas()
-                .iter()
-                .filter(|info| {
-                    let offset = info.offset() as usize;
-                    range.contains(&offset)
-                })
-                .map(|info| info.to_owned())
-                .collect::<Vec<_>>()
-        } else {
-            self.file_metadata.stripe_metadatas().to_vec()
-        }
-    }
-}
-
-impl<R: ChunkReader> Iterator for Cursor<R> {
-    type Item = Result<Stripe>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        self.get_stripe_metadatas()
-            .get(self.stripe_index)
-            .map(|info| {
-                let stripe = Stripe::new(
-                    &mut self.reader,
-                    &self.file_metadata,
-                    &self.projected_data_type.clone(),
-                    info,
-                );
-                self.stripe_index += 1;
-                stripe
-            })
-    }
-}
diff --git a/src/arrow_writer.rs b/src/arrow_writer.rs
deleted file mode 100644
index a8b1dd95..00000000
--- a/src/arrow_writer.rs
+++ /dev/null
@@ -1,477 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::io::Write;
-
-use arrow::{
-    array::RecordBatch,
-    datatypes::{DataType as ArrowDataType, SchemaRef},
-};
-use prost::Message;
-use snafu::{ensure, ResultExt};
-
-use crate::{
-    error::{IoSnafu, Result, UnexpectedSnafu},
-    memory::EstimateMemory,
-    proto,
-    writer::stripe::{StripeInformation, StripeWriter},
-};
-
-/// Construct an [`ArrowWriter`] to encode [`RecordBatch`]es into a single
-/// ORC file.
-pub struct ArrowWriterBuilder<W> {
-    writer: W,
-    schema: SchemaRef,
-    batch_size: usize,
-    stripe_byte_size: usize,
-}
-
-impl<W: Write> ArrowWriterBuilder<W> {
-    /// Create a new [`ArrowWriterBuilder`], which will write an ORC file to
-    /// the provided writer, with the expected Arrow schema.
-    pub fn new(writer: W, schema: SchemaRef) -> Self {
-        Self {
-            writer,
-            schema,
-            batch_size: 1024,
-            // 64 MiB
-            stripe_byte_size: 64 * 1024 * 1024,
-        }
-    }
-
-    /// Batch size controls the encoding behaviour, where `batch_size` values
-    /// are encoded at a time. Default is `1024`.
-    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
-        self.batch_size = batch_size;
-        self
-    }
-
-    /// The approximate size of stripes. Default is `64MiB`.
-    pub fn with_stripe_byte_size(mut self, stripe_byte_size: usize) -> Self {
-        self.stripe_byte_size = stripe_byte_size;
-        self
-    }
-
-    /// Construct an [`ArrowWriter`] ready to encode [`RecordBatch`]es into
-    /// an ORC file.
-    pub fn try_build(mut self) -> Result<ArrowWriter<W>> {
-        // Required magic "ORC" bytes at start of file
-        self.writer.write_all(b"ORC").context(IoSnafu)?;
-        let writer = StripeWriter::new(self.writer, &self.schema);
-        Ok(ArrowWriter {
-            writer,
-            schema: self.schema,
-            batch_size: self.batch_size,
-            stripe_byte_size: self.stripe_byte_size,
-            written_stripes: vec![],
-            // Accounting for the 3 magic bytes above
-            total_bytes_written: 3,
-        })
-    }
-}
-
-/// Encodes [`RecordBatch`]es into an ORC file. Will encode `batch_size` rows
-/// at a time into a stripe, flushing the stripe to the underlying writer when
-/// it's estimated memory footprint exceeds the configures `stripe_byte_size`.
-pub struct ArrowWriter<W> {
-    writer: StripeWriter<W>,
-    schema: SchemaRef,
-    batch_size: usize,
-    stripe_byte_size: usize,
-    written_stripes: Vec<StripeInformation>,
-    /// Used to keep track of progress in file so far (instead of needing Seek on the writer)
-    total_bytes_written: u64,
-}
-
-impl<W: Write> ArrowWriter<W> {
-    /// Encode the provided batch at `batch_size` rows at a time, flushing any
-    /// stripes that exceed the configured stripe size.
-    pub fn write(&mut self, batch: &RecordBatch) -> Result<()> {
-        ensure!(
-            batch.schema() == self.schema,
-            UnexpectedSnafu {
-                msg: "RecordBatch doesn't match expected schema"
-            }
-        );
-
-        for offset in (0..batch.num_rows()).step_by(self.batch_size) {
-            let length = self.batch_size.min(batch.num_rows() - offset);
-            let batch = batch.slice(offset, length);
-            self.writer.encode_batch(&batch)?;
-
-            // TODO: be able to flush whilst writing a batch (instead of between batches)
-            // Flush stripe when it exceeds estimated configured size
-            if self.writer.estimate_memory_size() > self.stripe_byte_size {
-                self.flush_stripe()?;
-            }
-        }
-        Ok(())
-    }
-
-    /// Flush any buffered data that hasn't been written, and write the stripe
-    /// footer metadata.
-    pub fn flush_stripe(&mut self) -> Result<()> {
-        let info = self.writer.finish_stripe(self.total_bytes_written)?;
-        self.total_bytes_written += info.total_byte_size();
-        self.written_stripes.push(info);
-        Ok(())
-    }
-
-    /// Flush the current stripe if it is still in progress, and write the tail
-    /// metadata and close the writer.
-    pub fn close(mut self) -> Result<()> {
-        // Flush in-progress stripe
-        if self.writer.row_count > 0 {
-            self.flush_stripe()?;
-        }
-        let footer = serialize_footer(&self.written_stripes, &self.schema);
-        let footer = footer.encode_to_vec();
-        let postscript = serialize_postscript(footer.len() as u64);
-        let postscript = postscript.encode_to_vec();
-        let postscript_len = postscript.len() as u8;
-
-        let mut writer = self.writer.finish();
-        writer.write_all(&footer).context(IoSnafu)?;
-        writer.write_all(&postscript).context(IoSnafu)?;
-        // Postscript length as last byte
-        writer.write_all(&[postscript_len]).context(IoSnafu)?;
-
-        // TODO: return file metadata
-        Ok(())
-    }
-}
-
-fn serialize_schema(schema: &SchemaRef) -> Vec<proto::Type> {
-    let mut types = vec![];
-
-    let field_names = schema
-        .fields()
-        .iter()
-        .map(|f| f.name().to_owned())
-        .collect();
-    // TODO: consider nested types
-    let subtypes = (1..(schema.fields().len() as u32 + 1)).collect();
-    let root_type = proto::Type {
-        kind: Some(proto::r#type::Kind::Struct.into()),
-        subtypes,
-        field_names,
-        maximum_length: None,
-        precision: None,
-        scale: None,
-        attributes: vec![],
-    };
-    types.push(root_type);
-    for field in schema.fields() {
-        let t = match field.data_type() {
-            ArrowDataType::Float32 => proto::Type {
-                kind: Some(proto::r#type::Kind::Float.into()),
-                ..Default::default()
-            },
-            ArrowDataType::Float64 => proto::Type {
-                kind: Some(proto::r#type::Kind::Double.into()),
-                ..Default::default()
-            },
-            ArrowDataType::Int8 => proto::Type {
-                kind: Some(proto::r#type::Kind::Byte.into()),
-                ..Default::default()
-            },
-            ArrowDataType::Int16 => proto::Type {
-                kind: Some(proto::r#type::Kind::Short.into()),
-                ..Default::default()
-            },
-            ArrowDataType::Int32 => proto::Type {
-                kind: Some(proto::r#type::Kind::Int.into()),
-                ..Default::default()
-            },
-            ArrowDataType::Int64 => proto::Type {
-                kind: Some(proto::r#type::Kind::Long.into()),
-                ..Default::default()
-            },
-            ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => proto::Type {
-                kind: Some(proto::r#type::Kind::String.into()),
-                ..Default::default()
-            },
-            ArrowDataType::Binary | ArrowDataType::LargeBinary => proto::Type {
-                kind: Some(proto::r#type::Kind::Binary.into()),
-                ..Default::default()
-            },
-            ArrowDataType::Boolean => proto::Type {
-                kind: Some(proto::r#type::Kind::Boolean.into()),
-                ..Default::default()
-            },
-            // TODO: support more types
-            _ => unimplemented!("unsupported datatype"),
-        };
-        types.push(t);
-    }
-    types
-}
-
-fn serialize_footer(stripes: &[StripeInformation], schema: &SchemaRef) -> proto::Footer {
-    let body_length = stripes
-        .iter()
-        .map(|s| s.index_length + s.data_length + s.footer_length)
-        .sum::<u64>();
-    let number_of_rows = stripes.iter().map(|s| s.row_count as u64).sum::<u64>();
-    let stripes = stripes.iter().map(From::from).collect();
-    let types = serialize_schema(schema);
-    proto::Footer {
-        header_length: Some(3),
-        content_length: Some(body_length + 3),
-        stripes,
-        types,
-        metadata: vec![],
-        number_of_rows: Some(number_of_rows),
-        statistics: vec![],
-        row_index_stride: None,
-        writer: Some(u32::MAX),
-        encryption: None,
-        calendar: None,
-        software_version: None,
-    }
-}
-
-fn serialize_postscript(footer_length: u64) -> proto::PostScript {
-    proto::PostScript {
-        footer_length: Some(footer_length),
-        compression: Some(proto::CompressionKind::None.into()), // TODO: support compression
-        compression_block_size: None,
-        version: vec![0, 12],
-        metadata_length: Some(0),       // TODO: statistics
-        writer_version: Some(u32::MAX), // TODO: check which version to use
-        stripe_statistics_length: None,
-        magic: Some("ORC".to_string()),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::sync::Arc;
-
-    use arrow::{
-        array::{
-            Array, BinaryArray, BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array,
-            Int64Array, Int8Array, LargeBinaryArray, LargeStringArray, RecordBatchReader,
-            StringArray,
-        },
-        compute::concat_batches,
-        datatypes::{DataType as ArrowDataType, Field, Schema},
-    };
-    use bytes::Bytes;
-
-    use crate::ArrowReaderBuilder;
-
-    use super::*;
-
-    fn roundtrip(batches: &[RecordBatch]) -> Vec<RecordBatch> {
-        let mut f = vec![];
-        let mut writer = ArrowWriterBuilder::new(&mut f, batches[0].schema())
-            .try_build()
-            .unwrap();
-        for batch in batches {
-            writer.write(batch).unwrap();
-        }
-        writer.close().unwrap();
-
-        let f = Bytes::from(f);
-        let reader = ArrowReaderBuilder::try_new(f).unwrap().build();
-        reader.collect::<Result<Vec<_>, _>>().unwrap()
-    }
-
-    #[test]
-    fn test_roundtrip_write() {
-        let f32_array = Arc::new(Float32Array::from(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0]));
-        let f64_array = Arc::new(Float64Array::from(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0]));
-        let int8_array = Arc::new(Int8Array::from(vec![0, 1, 2, 3, 4, 5, 6]));
-        let int16_array = Arc::new(Int16Array::from(vec![0, 1, 2, 3, 4, 5, 6]));
-        let int32_array = Arc::new(Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6]));
-        let int64_array = Arc::new(Int64Array::from(vec![0, 1, 2, 3, 4, 5, 6]));
-        let utf8_array = Arc::new(StringArray::from(vec![
-            "Hello",
-            "there",
-            "楡井希実",
-            "💯",
-            "ORC",
-            "",
-            "123",
-        ]));
-        let binary_array = Arc::new(BinaryArray::from(vec![
-            "Hello".as_bytes(),
-            "there".as_bytes(),
-            "楡井希実".as_bytes(),
-            "💯".as_bytes(),
-            "ORC".as_bytes(),
-            "".as_bytes(),
-            "123".as_bytes(),
-        ]));
-        let boolean_array = Arc::new(BooleanArray::from(vec![
-            true, false, true, false, true, true, false,
-        ]));
-        let schema = Schema::new(vec![
-            Field::new("f32", ArrowDataType::Float32, false),
-            Field::new("f64", ArrowDataType::Float64, false),
-            Field::new("int8", ArrowDataType::Int8, false),
-            Field::new("int16", ArrowDataType::Int16, false),
-            Field::new("int32", ArrowDataType::Int32, false),
-            Field::new("int64", ArrowDataType::Int64, false),
-            Field::new("utf8", ArrowDataType::Utf8, false),
-            Field::new("binary", ArrowDataType::Binary, false),
-            Field::new("boolean", ArrowDataType::Boolean, false),
-        ]);
-
-        let batch = RecordBatch::try_new(
-            Arc::new(schema),
-            vec![
-                f32_array,
-                f64_array,
-                int8_array,
-                int16_array,
-                int32_array,
-                int64_array,
-                utf8_array,
-                binary_array,
-                boolean_array,
-            ],
-        )
-        .unwrap();
-
-        let rows = roundtrip(&[batch.clone()]);
-        assert_eq!(batch, rows[0]);
-    }
-
-    #[test]
-    fn test_roundtrip_write_large_type() {
-        let large_utf8_array = Arc::new(LargeStringArray::from(vec![
-            "Hello",
-            "there",
-            "楡井希実",
-            "💯",
-            "ORC",
-            "",
-            "123",
-        ]));
-        let large_binary_array = Arc::new(LargeBinaryArray::from(vec![
-            "Hello".as_bytes(),
-            "there".as_bytes(),
-            "楡井希実".as_bytes(),
-            "💯".as_bytes(),
-            "ORC".as_bytes(),
-            "".as_bytes(),
-            "123".as_bytes(),
-        ]));
-        let schema = Schema::new(vec![
-            Field::new("large_utf8", ArrowDataType::LargeUtf8, false),
-            Field::new("large_binary", ArrowDataType::LargeBinary, false),
-        ]);
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![large_utf8_array, large_binary_array])
-                .unwrap();
-
-        let rows = roundtrip(&[batch]);
-
-        // Currently we read all String/Binary columns from ORC as plain StringArray/BinaryArray
-        let utf8_array = Arc::new(StringArray::from(vec![
-            "Hello",
-            "there",
-            "楡井希実",
-            "💯",
-            "ORC",
-            "",
-            "123",
-        ]));
-        let binary_array = Arc::new(BinaryArray::from(vec![
-            "Hello".as_bytes(),
-            "there".as_bytes(),
-            "楡井希実".as_bytes(),
-            "💯".as_bytes(),
-            "ORC".as_bytes(),
-            "".as_bytes(),
-            "123".as_bytes(),
-        ]));
-        let schema = Schema::new(vec![
-            Field::new("large_utf8", ArrowDataType::Utf8, false),
-            Field::new("large_binary", ArrowDataType::Binary, false),
-        ]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![utf8_array, binary_array]).unwrap();
-        assert_eq!(batch, rows[0]);
-    }
-
-    #[test]
-    fn test_write_small_stripes() {
-        // Set small stripe size to ensure writing across multiple stripes works
-        let data: Vec<i64> = (0..1_000_000).collect();
-        let int64_array = Arc::new(Int64Array::from(data));
-        let schema = Schema::new(vec![Field::new("int64", ArrowDataType::Int64, true)]);
-
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![int64_array]).unwrap();
-
-        let mut f = vec![];
-        let mut writer = ArrowWriterBuilder::new(&mut f, batch.schema())
-            .with_stripe_byte_size(256)
-            .try_build()
-            .unwrap();
-        writer.write(&batch).unwrap();
-        writer.close().unwrap();
-
-        let f = Bytes::from(f);
-        let reader = ArrowReaderBuilder::try_new(f).unwrap().build();
-        let schema = reader.schema();
-        // Current reader doesn't read a batch across stripe boundaries, so we expect
-        // more than one batch to prove multiple stripes are being written here
-        let rows = reader.collect::<Result<Vec<_>, _>>().unwrap();
-        assert!(
-            rows.len() > 1,
-            "must have written more than 1 stripe (each stripe read as separate recordbatch)"
-        );
-        let actual = concat_batches(&schema, rows.iter()).unwrap();
-        assert_eq!(batch, actual);
-    }
-
-    #[test]
-    fn test_write_inconsistent_null_buffers() {
-        // When writing arrays where null buffer can appear/disappear between writes
-        let schema = Arc::new(Schema::new(vec![Field::new(
-            "int64",
-            ArrowDataType::Int64,
-            true,
-        )]));
-
-        // Ensure first batch has array with no null buffer
-        let array_no_nulls = Arc::new(Int64Array::from(vec![1, 2, 3]));
-        assert!(array_no_nulls.nulls().is_none());
-        // But subsequent batch has array with null buffer
-        let array_with_nulls = Arc::new(Int64Array::from(vec![None, Some(4), None]));
-        assert!(array_with_nulls.nulls().is_some());
-
-        let batch1 = RecordBatch::try_new(schema.clone(), vec![array_no_nulls]).unwrap();
-        let batch2 = RecordBatch::try_new(schema.clone(), vec![array_with_nulls]).unwrap();
-
-        // ORC writer should be able to handle this gracefully
-        let expected_array = Arc::new(Int64Array::from(vec![
-            Some(1),
-            Some(2),
-            Some(3),
-            None,
-            Some(4),
-            None,
-        ]));
-        let expected_batch = RecordBatch::try_new(schema, vec![expected_array]).unwrap();
-
-        let rows = roundtrip(&[batch1, batch2]);
-        assert_eq!(expected_batch, rows[0]);
-    }
-}
diff --git a/src/async_arrow_reader.rs b/src/async_arrow_reader.rs
deleted file mode 100644
index 94a0565d..00000000
--- a/src/async_arrow_reader.rs
+++ /dev/null
@@ -1,228 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::fmt::Formatter;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use arrow::datatypes::SchemaRef;
-use arrow::error::ArrowError;
-use arrow::record_batch::RecordBatch;
-use futures::future::BoxFuture;
-use futures::{ready, Stream};
-use futures_util::FutureExt;
-
-use crate::array_decoder::NaiveStripeDecoder;
-use crate::arrow_reader::Cursor;
-use crate::error::Result;
-use crate::reader::metadata::read_metadata_async;
-use crate::reader::AsyncChunkReader;
-use crate::stripe::{Stripe, StripeMetadata};
-use crate::ArrowReaderBuilder;
-
-type BoxedDecoder = Box<dyn Iterator<Item = Result<RecordBatch>> + Send>;
-
-enum StreamState<T> {
-    /// At the start of a new row group, or the end of the file stream
-    Init,
-    /// Decoding a batch
-    Decoding(BoxedDecoder),
-    /// Reading data from input
-    Reading(BoxFuture<'static, Result<(StripeFactory<T>, Option<Stripe>)>>),
-    /// Error
-    Error,
-}
-
-impl<T> std::fmt::Debug for StreamState<T> {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        match self {
-            StreamState::Init => write!(f, "StreamState::Init"),
-            StreamState::Decoding(_) => write!(f, "StreamState::Decoding"),
-            StreamState::Reading(_) => write!(f, "StreamState::Reading"),
-            StreamState::Error => write!(f, "StreamState::Error"),
-        }
-    }
-}
-
-impl<R: Send> From<Cursor<R>> for StripeFactory<R> {
-    fn from(c: Cursor<R>) -> Self {
-        Self {
-            inner: c,
-            is_end: false,
-        }
-    }
-}
-
-struct StripeFactory<R> {
-    inner: Cursor<R>,
-    is_end: bool,
-}
-
-pub struct ArrowStreamReader<R: AsyncChunkReader> {
-    factory: Option<Box<StripeFactory<R>>>,
-    batch_size: usize,
-    schema_ref: SchemaRef,
-    state: StreamState<R>,
-}
-
-impl<R: AsyncChunkReader + 'static> StripeFactory<R> {
-    async fn read_next_stripe_inner(&mut self, info: &StripeMetadata) -> Result<Stripe> {
-        let inner = &mut self.inner;
-
-        inner.stripe_index += 1;
-
-        Stripe::new_async(
-            &mut inner.reader,
-            &inner.file_metadata,
-            &inner.projected_data_type,
-            info,
-        )
-        .await
-    }
-
-    async fn read_next_stripe(mut self) -> Result<(Self, Option<Stripe>)> {
-        let info = self
-            .inner
-            .file_metadata
-            .stripe_metadatas()
-            .get(self.inner.stripe_index)
-            .cloned();
-
-        if let Some(info) = info {
-            if let Some(range) = self.inner.file_byte_range.clone() {
-                let offset = info.offset() as usize;
-                if !range.contains(&offset) {
-                    self.inner.stripe_index += 1;
-                    return Ok((self, None));
-                }
-            }
-            match self.read_next_stripe_inner(&info).await {
-                Ok(stripe) => Ok((self, Some(stripe))),
-                Err(err) => Err(err),
-            }
-        } else {
-            self.is_end = true;
-            Ok((self, None))
-        }
-    }
-}
-
-impl<R: AsyncChunkReader + 'static> ArrowStreamReader<R> {
-    pub(crate) fn new(cursor: Cursor<R>, batch_size: usize, schema_ref: SchemaRef) -> Self {
-        Self {
-            factory: Some(Box::new(cursor.into())),
-            batch_size,
-            schema_ref,
-            state: StreamState::Init,
-        }
-    }
-
-    pub fn schema(&self) -> SchemaRef {
-        self.schema_ref.clone()
-    }
-
-    fn poll_next_inner(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Result<RecordBatch>>> {
-        loop {
-            match &mut self.state {
-                StreamState::Decoding(decoder) => match decoder.next() {
-                    Some(Ok(batch)) => {
-                        return Poll::Ready(Some(Ok(batch)));
-                    }
-                    Some(Err(e)) => {
-                        self.state = StreamState::Error;
-                        return Poll::Ready(Some(Err(e)));
-                    }
-                    None => self.state = StreamState::Init,
-                },
-                StreamState::Init => {
-                    let factory = self.factory.take().expect("lost factory");
-                    if factory.is_end {
-                        return Poll::Ready(None);
-                    }
-
-                    let fut = factory.read_next_stripe().boxed();
-
-                    self.state = StreamState::Reading(fut)
-                }
-                StreamState::Reading(f) => match ready!(f.poll_unpin(cx)) {
-                    Ok((factory, Some(stripe))) => {
-                        self.factory = Some(Box::new(factory));
-                        match NaiveStripeDecoder::new(
-                            stripe,
-                            self.schema_ref.clone(),
-                            self.batch_size,
-                        ) {
-                            Ok(decoder) => {
-                                self.state = StreamState::Decoding(Box::new(decoder));
-                            }
-                            Err(e) => {
-                                self.state = StreamState::Error;
-                                return Poll::Ready(Some(Err(e)));
-                            }
-                        }
-                    }
-                    Ok((factory, None)) => {
-                        self.factory = Some(Box::new(factory));
-                        // All rows skipped, read next row group
-                        self.state = StreamState::Init;
-                    }
-                    Err(e) => {
-                        self.state = StreamState::Error;
-                        return Poll::Ready(Some(Err(e)));
-                    }
-                },
-                StreamState::Error => return Poll::Ready(None), // Ends the stream as error happens.
-            }
-        }
-    }
-}
-
-impl<R: AsyncChunkReader + 'static> Stream for ArrowStreamReader<R> {
-    type Item = Result<RecordBatch, ArrowError>;
-
-    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
-        self.poll_next_inner(cx)
-            .map_err(|e| ArrowError::ExternalError(Box::new(e)))
-    }
-}
-
-impl<R: AsyncChunkReader + 'static> ArrowReaderBuilder<R> {
-    pub async fn try_new_async(mut reader: R) -> Result<Self> {
-        let file_metadata = Arc::new(read_metadata_async(&mut reader).await?);
-        Ok(Self::new(reader, file_metadata))
-    }
-
-    pub fn build_async(self) -> ArrowStreamReader<R> {
-        let projected_data_type = self
-            .file_metadata()
-            .root_data_type()
-            .project(&self.projection);
-        let schema_ref = self.schema();
-        let cursor = Cursor {
-            reader: self.reader,
-            file_metadata: self.file_metadata,
-            projected_data_type,
-            stripe_index: 0,
-            file_byte_range: self.file_byte_range,
-        };
-        ArrowStreamReader::new(cursor, self.batch_size, schema_ref)
-    }
-}
diff --git a/src/bin/orc-export.rs b/src/bin/orc-export.rs
deleted file mode 100644
index 257c03f7..00000000
--- a/src/bin/orc-export.rs
+++ /dev/null
@@ -1,151 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{fs::File, io, path::PathBuf};
-
-use anyhow::Result;
-use arrow::{array::RecordBatch, csv, datatypes::DataType, error::ArrowError, json};
-use clap::{Parser, ValueEnum};
-use json::writer::{JsonFormat, LineDelimited};
-use orc_rust::{projection::ProjectionMask, reader::metadata::read_metadata, ArrowReaderBuilder};
-
-#[derive(Parser)]
-#[command(name = "orc-export")]
-#[command(version, about = "Export data from orc file to csv", long_about = None)]
-struct Cli {
-    /// Path to the orc file
-    file: PathBuf,
-    /// Output file. If not provided output will be printed on console
-    #[arg(short, long)]
-    output_file: Option<PathBuf>,
-    /// Output format. If not provided then the output is csv
-    #[arg(value_enum, short, long, default_value_t = FileFormat::Csv)]
-    format: FileFormat,
-    /// export only first N records
-    #[arg(short, long, value_name = "N")]
-    num_rows: Option<u64>,
-    /// export only provided columns. Comma separated list
-    #[arg(short, long, value_delimiter = ',')]
-    columns: Option<Vec<String>>,
-}
-
-#[derive(Clone, Debug, PartialEq, ValueEnum)]
-enum FileFormat {
-    /// Output data in csv format
-    Csv,
-    /// Output data in json format
-    Json,
-}
-
-#[allow(clippy::large_enum_variant)]
-enum OutputWriter<W: io::Write, F: JsonFormat> {
-    Csv(csv::Writer<W>),
-    Json(json::Writer<W, F>),
-}
-
-impl<W, F> OutputWriter<W, F>
-where
-    W: io::Write,
-    F: JsonFormat,
-{
-    fn write(&mut self, batch: &RecordBatch) -> Result<(), ArrowError> {
-        match self {
-            OutputWriter::Csv(w) => w.write(batch),
-            OutputWriter::Json(w) => w.write(batch),
-        }
-    }
-
-    fn finish(&mut self) -> Result<(), ArrowError> {
-        match self {
-            OutputWriter::Csv(_) => Ok(()),
-            OutputWriter::Json(w) => w.finish(),
-        }
-    }
-}
-
-fn main() -> Result<()> {
-    let cli = Cli::parse();
-
-    // Prepare reader
-    let mut f = File::open(&cli.file)?;
-    let metadata = read_metadata(&mut f)?;
-
-    // Select columns which should be exported (Binary and Decimal are not supported)
-    let cols: Vec<usize> = metadata
-        .root_data_type()
-        .children()
-        .iter()
-        .enumerate()
-        // TODO: handle nested types
-        .filter(|(_, nc)| match nc.data_type().to_arrow_data_type() {
-            DataType::Binary => false,
-            DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => {
-                matches!(cli.format, FileFormat::Csv)
-            }
-            _ => {
-                if let Some(cols) = &cli.columns {
-                    cols.iter().any(|c| nc.name().eq(c))
-                } else {
-                    true
-                }
-            }
-        })
-        .map(|(i, _)| i)
-        .collect();
-
-    let projection = ProjectionMask::roots(metadata.root_data_type(), cols);
-    let reader = ArrowReaderBuilder::try_new(f)?
-        .with_projection(projection)
-        .build();
-
-    // Prepare writer
-    let writer: Box<dyn io::Write> = if let Some(output) = cli.output_file {
-        Box::new(File::create(output)?)
-    } else {
-        Box::new(io::stdout())
-    };
-
-    let mut output_writer = match cli.format {
-        FileFormat::Json => {
-            OutputWriter::Json(json::WriterBuilder::new().build::<_, LineDelimited>(writer))
-        }
-        _ => OutputWriter::Csv(csv::WriterBuilder::new().with_header(true).build(writer)),
-    };
-
-    // Convert data
-    let mut num_rows = cli.num_rows.unwrap_or(u64::MAX);
-    for mut batch in reader.flatten() {
-        // Restrict rows
-        if num_rows < batch.num_rows() as u64 {
-            batch = batch.slice(0, num_rows as usize);
-        }
-
-        // Save
-        output_writer.write(&batch)?;
-
-        // Have we reached limit on the number of rows?
-        if num_rows > batch.num_rows() as u64 {
-            num_rows -= batch.num_rows() as u64;
-        } else {
-            break;
-        }
-    }
-
-    output_writer.finish()?;
-
-    Ok(())
-}
diff --git a/src/bin/orc-metadata.rs b/src/bin/orc-metadata.rs
deleted file mode 100644
index 9c15ca8a..00000000
--- a/src/bin/orc-metadata.rs
+++ /dev/null
@@ -1,77 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{error::Error, fs::File, path::PathBuf, sync::Arc};
-
-use clap::Parser;
-use orc_rust::{reader::metadata::read_metadata, stripe::Stripe};
-
-#[derive(Parser)]
-#[command(version, about, long_about = None)]
-struct Cli {
-    /// ORC file path
-    file: PathBuf,
-
-    /// Display data for all stripes
-    #[arg(short, long)]
-    stripes: bool,
-}
-
-fn main() -> Result<(), Box<dyn Error>> {
-    let cli = Cli::parse();
-
-    let mut f = File::open(cli.file)?;
-    let metadata = Arc::new(read_metadata(&mut f)?);
-
-    // TODO: better way to handle this printing?
-    println!(
-        "compression: {}",
-        metadata
-            .compression()
-            .map(|c| c.to_string())
-            .unwrap_or("None".to_string())
-    );
-    println!("file format version: {}", metadata.file_format_version());
-    println!("number of rows: {}", metadata.number_of_rows());
-    println!("number of stripes: {}", metadata.stripe_metadatas().len());
-
-    // TODO: nesting types indentation is messed up
-    println!("schema:\n{}", metadata.root_data_type());
-    if cli.stripes {
-        println!("\n=== Stripes ===");
-        for (i, stripe_metadata) in metadata.stripe_metadatas().iter().enumerate() {
-            let stripe = Stripe::new(
-                &mut f,
-                &metadata,
-                metadata.root_data_type(),
-                stripe_metadata,
-            )?;
-            println!("stripe index: {i}");
-            println!("number of rows: {}", stripe.number_of_rows());
-            println!(
-                "writer timezone: {}",
-                stripe
-                    .writer_tz()
-                    .map(|tz| tz.to_string())
-                    .unwrap_or("None".to_string())
-            );
-            println!();
-        }
-    }
-
-    Ok(())
-}
diff --git a/src/bin/orc-stats.rs b/src/bin/orc-stats.rs
deleted file mode 100644
index 1113a011..00000000
--- a/src/bin/orc-stats.rs
+++ /dev/null
@@ -1,149 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{fs::File, path::PathBuf, sync::Arc};
-
-use anyhow::Result;
-use arrow::temporal_conversions::{date32_to_datetime, timestamp_ms_to_datetime};
-use clap::Parser;
-use orc_rust::{reader::metadata::read_metadata, statistics::ColumnStatistics};
-
-#[derive(Parser)]
-#[command(name = "orc-stats")]
-#[command(version, about = "Print column and stripe stats from the orc file", long_about = None)]
-struct Cli {
-    /// Path to the orc file
-    file: PathBuf,
-}
-
-fn print_column_stats(col_stats: &ColumnStatistics) {
-    if let Some(tstats) = col_stats.type_statistics() {
-        match tstats {
-            orc_rust::statistics::TypeStatistics::Integer { min, max, sum } => {
-                println!("* Data type Integer");
-                println!("* Minimum: {}", min);
-                println!("* Maximum: {}", max);
-                if let Some(sum) = sum {
-                    println!("* Sum: {}", sum);
-                }
-            }
-            orc_rust::statistics::TypeStatistics::Double { min, max, sum } => {
-                println!("* Data type Double");
-                println!("* Minimum: {}", min);
-                println!("* Maximum: {}", max);
-                if let Some(sum) = sum {
-                    println!("* Sum: {}", sum);
-                }
-            }
-            orc_rust::statistics::TypeStatistics::String { min, max, sum } => {
-                println!("* Data type String");
-                println!("* Minimum: {}", min);
-                println!("* Maximum: {}", max);
-                println!("* Sum: {}", sum);
-            }
-            orc_rust::statistics::TypeStatistics::Bucket { true_count } => {
-                println!("* Data type Bucket");
-                println!("* True count: {}", true_count);
-            }
-            orc_rust::statistics::TypeStatistics::Decimal { min, max, sum } => {
-                println!("* Data type Decimal");
-                println!("* Minimum: {}", min);
-                println!("* Maximum: {}", max);
-                println!("* Sum: {}", sum);
-            }
-            orc_rust::statistics::TypeStatistics::Date { min, max } => {
-                println!("* Data type Date");
-                if let Some(dt) = date32_to_datetime(*min) {
-                    println!("* Minimum: {}", dt);
-                }
-                if let Some(dt) = date32_to_datetime(*max) {
-                    println!("* Maximum: {}", dt);
-                }
-            }
-            orc_rust::statistics::TypeStatistics::Binary { sum } => {
-                println!("* Data type Binary");
-                println!("* Sum: {}", sum);
-            }
-            orc_rust::statistics::TypeStatistics::Timestamp {
-                min,
-                max,
-                min_utc,
-                max_utc,
-            } => {
-                println!("* Data type Timestamp");
-                println!("* Minimum: {}", min);
-                println!("* Maximum: {}", max);
-                if let Some(ts) = timestamp_ms_to_datetime(*min_utc) {
-                    println!("* Minimum UTC: {}", ts);
-                }
-                if let Some(ts) = timestamp_ms_to_datetime(*max_utc) {
-                    println!("* Maximum UTC: {}", ts);
-                }
-            }
-            orc_rust::statistics::TypeStatistics::Collection {
-                min_children,
-                max_children,
-                total_children,
-            } => {
-                println!("* Data type Collection");
-                println!("* Minimum children: {}", min_children);
-                println!("* Maximum children: {}", max_children);
-                println!("* Total children: {}", total_children);
-            }
-        }
-    }
-
-    println!("* Num values: {}", col_stats.number_of_values());
-    println!("* Has nulls: {}", col_stats.has_null());
-    println!();
-}
-
-fn main() -> Result<()> {
-    let cli = Cli::parse();
-
-    let mut f = File::open(&cli.file)?;
-    let metadata = Arc::new(read_metadata(&mut f)?);
-
-    println!("# Column stats");
-    println!(
-        "File {:?} has {} columns",
-        cli.file,
-        metadata.column_file_statistics().len()
-    );
-    println!();
-    for (idx, col_stats) in metadata.column_file_statistics().iter().enumerate() {
-        println!("## Column {idx}");
-        print_column_stats(col_stats);
-    }
-
-    println!("# Stripe stats");
-    println!(
-        "File {:?} has {} stripes",
-        cli.file,
-        metadata.stripe_metadatas().len()
-    );
-    println!();
-    for (idm, sm) in metadata.stripe_metadatas().iter().enumerate() {
-        println!("----- Stripe {idm} -----\n");
-        for (idc, col_stats) in sm.column_statistics().iter().enumerate() {
-            println!("## Column {idc}");
-            print_column_stats(col_stats);
-        }
-    }
-
-    Ok(())
-}
diff --git a/src/column.rs b/src/column.rs
deleted file mode 100644
index aaacb31e..00000000
--- a/src/column.rs
+++ /dev/null
@@ -1,141 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::sync::Arc;
-
-use bytes::Bytes;
-use snafu::ResultExt;
-
-use crate::error::{IoSnafu, Result};
-use crate::proto::{ColumnEncoding, StripeFooter};
-use crate::reader::ChunkReader;
-use crate::schema::DataType;
-
-#[derive(Clone, Debug)]
-pub struct Column {
-    footer: Arc<StripeFooter>,
-    name: String,
-    data_type: DataType,
-}
-
-impl Column {
-    pub fn new(name: &str, data_type: &DataType, footer: &Arc<StripeFooter>) -> Self {
-        Self {
-            footer: footer.clone(),
-            data_type: data_type.clone(),
-            name: name.to_string(),
-        }
-    }
-
-    pub fn dictionary_size(&self) -> usize {
-        let column = self.data_type.column_index();
-        self.footer.columns[column]
-            .dictionary_size
-            .unwrap_or_default() as usize
-    }
-
-    pub fn encoding(&self) -> ColumnEncoding {
-        let column = self.data_type.column_index();
-        self.footer.columns[column].clone()
-    }
-
-    pub fn data_type(&self) -> &DataType {
-        &self.data_type
-    }
-
-    pub fn name(&self) -> &str {
-        &self.name
-    }
-
-    pub fn column_id(&self) -> u32 {
-        self.data_type.column_index() as u32
-    }
-
-    pub fn children(&self) -> Vec<Column> {
-        match &self.data_type {
-            DataType::Boolean { .. }
-            | DataType::Byte { .. }
-            | DataType::Short { .. }
-            | DataType::Int { .. }
-            | DataType::Long { .. }
-            | DataType::Float { .. }
-            | DataType::Double { .. }
-            | DataType::String { .. }
-            | DataType::Varchar { .. }
-            | DataType::Char { .. }
-            | DataType::Binary { .. }
-            | DataType::Decimal { .. }
-            | DataType::Timestamp { .. }
-            | DataType::TimestampWithLocalTimezone { .. }
-            | DataType::Date { .. } => vec![],
-            DataType::Struct { children, .. } => children
-                .iter()
-                .map(|col| Column {
-                    footer: self.footer.clone(),
-                    name: col.name().to_string(),
-                    data_type: col.data_type().clone(),
-                })
-                .collect(),
-            DataType::List { child, .. } => {
-                vec![Column {
-                    footer: self.footer.clone(),
-                    name: "item".to_string(),
-                    data_type: *child.clone(),
-                }]
-            }
-            DataType::Map { key, value, .. } => {
-                vec![
-                    Column {
-                        footer: self.footer.clone(),
-                        name: "key".to_string(),
-                        data_type: *key.clone(),
-                    },
-                    Column {
-                        footer: self.footer.clone(),
-                        name: "value".to_string(),
-                        data_type: *value.clone(),
-                    },
-                ]
-            }
-            DataType::Union { variants, .. } => {
-                // TODO: might need corrections
-                variants
-                    .iter()
-                    .enumerate()
-                    .map(|(index, data_type)| Column {
-                        footer: self.footer.clone(),
-                        name: format!("{index}"),
-                        data_type: data_type.clone(),
-                    })
-                    .collect()
-            }
-        }
-    }
-
-    pub fn read_stream<R: ChunkReader>(reader: &mut R, start: u64, length: u64) -> Result<Bytes> {
-        reader.get_bytes(start, length).context(IoSnafu)
-    }
-
-    #[cfg(feature = "async")]
-    pub async fn read_stream_async<R: crate::reader::AsyncChunkReader>(
-        reader: &mut R,
-        start: u64,
-        length: u64,
-    ) -> Result<Bytes> {
-        reader.get_bytes(start, length).await.context(IoSnafu)
-    }
-}
diff --git a/src/compression.rs b/src/compression.rs
deleted file mode 100644
index ffd60382..00000000
--- a/src/compression.rs
+++ /dev/null
@@ -1,371 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Modified from https://github.com/DataEngineeringLabs/orc-format/blob/416490db0214fc51d53289253c0ee91f7fc9bc17/src/read/decompress/mod.rs
-//! Related code for handling decompression of ORC files.
-
-use std::io::Read;
-
-use bytes::{Bytes, BytesMut};
-use fallible_streaming_iterator::FallibleStreamingIterator;
-use snafu::ResultExt;
-
-use crate::error::{self, OrcError, Result};
-use crate::proto::{self, CompressionKind};
-
-// Spec states default is 256K
-const DEFAULT_COMPRESSION_BLOCK_SIZE: u64 = 256 * 1024;
-
-#[derive(Clone, Copy, Debug)]
-pub struct Compression {
-    compression_type: CompressionType,
-    /// No compression chunk will decompress to larger than this size.
-    /// Use to size the scratch buffer appropriately.
-    max_decompressed_block_size: usize,
-}
-
-impl std::fmt::Display for Compression {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "{} ({} byte max block size)",
-            self.compression_type, self.max_decompressed_block_size
-        )
-    }
-}
-
-impl Compression {
-    pub fn compression_type(&self) -> CompressionType {
-        self.compression_type
-    }
-
-    pub(crate) fn from_proto(
-        kind: proto::CompressionKind,
-        compression_block_size: Option<u64>,
-    ) -> Option<Self> {
-        let max_decompressed_block_size =
-            compression_block_size.unwrap_or(DEFAULT_COMPRESSION_BLOCK_SIZE) as usize;
-        match kind {
-            CompressionKind::None => None,
-            CompressionKind::Zlib => Some(Self {
-                compression_type: CompressionType::Zlib,
-                max_decompressed_block_size,
-            }),
-            CompressionKind::Snappy => Some(Self {
-                compression_type: CompressionType::Snappy,
-                max_decompressed_block_size,
-            }),
-            CompressionKind::Lzo => Some(Self {
-                compression_type: CompressionType::Lzo,
-                max_decompressed_block_size,
-            }),
-            CompressionKind::Lz4 => Some(Self {
-                compression_type: CompressionType::Lz4,
-                max_decompressed_block_size,
-            }),
-            CompressionKind::Zstd => Some(Self {
-                compression_type: CompressionType::Zstd,
-                max_decompressed_block_size,
-            }),
-        }
-    }
-}
-
-#[derive(Clone, Copy, Debug)]
-pub enum CompressionType {
-    Zlib,
-    Snappy,
-    Lzo,
-    Lz4,
-    Zstd,
-}
-
-impl std::fmt::Display for CompressionType {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
-/// Indicates length of block and whether it's compressed or not.
-#[derive(Debug, PartialEq, Eq)]
-enum CompressionHeader {
-    Original(u32),
-    Compressed(u32),
-}
-
-/// ORC files are compressed in blocks, with a 3 byte header at the start
-/// of these blocks indicating the length of the block and whether it's
-/// compressed or not.
-fn decode_header(bytes: [u8; 3]) -> CompressionHeader {
-    let bytes = [bytes[0], bytes[1], bytes[2], 0];
-    let length_and_flag = u32::from_le_bytes(bytes);
-    let is_original = length_and_flag & 1 == 1;
-    let length = length_and_flag >> 1;
-    if is_original {
-        CompressionHeader::Original(length)
-    } else {
-        CompressionHeader::Compressed(length)
-    }
-}
-
-pub(crate) trait DecompressorVariant: Send {
-    fn decompress_block(&self, compressed_bytes: &[u8], scratch: &mut Vec<u8>) -> Result<()>;
-}
-
-#[derive(Debug, Clone, Copy)]
-struct Zlib;
-#[derive(Debug, Clone, Copy)]
-struct Zstd;
-#[derive(Debug, Clone, Copy)]
-struct Snappy;
-#[derive(Debug, Clone, Copy)]
-struct Lzo;
-#[derive(Debug, Clone, Copy)]
-struct Lz4 {
-    max_decompressed_block_size: usize,
-}
-
-impl DecompressorVariant for Zlib {
-    fn decompress_block(&self, compressed_bytes: &[u8], scratch: &mut Vec<u8>) -> Result<()> {
-        let mut gz = flate2::read::DeflateDecoder::new(compressed_bytes);
-        scratch.clear();
-        gz.read_to_end(scratch).context(error::IoSnafu)?;
-        Ok(())
-    }
-}
-
-impl DecompressorVariant for Zstd {
-    fn decompress_block(&self, compressed_bytes: &[u8], scratch: &mut Vec<u8>) -> Result<()> {
-        let mut reader =
-            zstd::Decoder::new(compressed_bytes).context(error::BuildZstdDecoderSnafu)?;
-        scratch.clear();
-        reader.read_to_end(scratch).context(error::IoSnafu)?;
-        Ok(())
-    }
-}
-
-impl DecompressorVariant for Snappy {
-    fn decompress_block(&self, compressed_bytes: &[u8], scratch: &mut Vec<u8>) -> Result<()> {
-        let len =
-            snap::raw::decompress_len(compressed_bytes).context(error::BuildSnappyDecoderSnafu)?;
-        scratch.resize(len, 0);
-        let mut decoder = snap::raw::Decoder::new();
-        decoder
-            .decompress(compressed_bytes, scratch)
-            .context(error::BuildSnappyDecoderSnafu)?;
-        Ok(())
-    }
-}
-
-impl DecompressorVariant for Lzo {
-    fn decompress_block(&self, compressed_bytes: &[u8], scratch: &mut Vec<u8>) -> Result<()> {
-        let decompressed = lzokay_native::decompress_all(compressed_bytes, None)
-            .context(error::BuildLzoDecoderSnafu)?;
-        // TODO: better way to utilize scratch here
-        scratch.clear();
-        scratch.extend(decompressed);
-        Ok(())
-    }
-}
-
-impl DecompressorVariant for Lz4 {
-    fn decompress_block(&self, compressed_bytes: &[u8], scratch: &mut Vec<u8>) -> Result<()> {
-        let decompressed =
-            lz4_flex::block::decompress(compressed_bytes, self.max_decompressed_block_size)
-                .context(error::BuildLz4DecoderSnafu)?;
-        // TODO: better way to utilize scratch here
-        scratch.clear();
-        scratch.extend(decompressed);
-        Ok(())
-    }
-}
-
-// TODO: push this earlier so we don't check this variant each time
-fn get_decompressor_variant(
-    Compression {
-        compression_type,
-        max_decompressed_block_size,
-    }: Compression,
-) -> Box<dyn DecompressorVariant> {
-    match compression_type {
-        CompressionType::Zlib => Box::new(Zlib),
-        CompressionType::Snappy => Box::new(Snappy),
-        CompressionType::Lzo => Box::new(Lzo),
-        CompressionType::Lz4 => Box::new(Lz4 {
-            max_decompressed_block_size,
-        }),
-        CompressionType::Zstd => Box::new(Zstd),
-    }
-}
-
-enum State {
-    Original(Bytes),
-    Compressed(Vec<u8>),
-}
-
-struct DecompressorIter {
-    stream: BytesMut,
-    current: Option<State>, // when we have compression but the value is original
-    compression: Option<Box<dyn DecompressorVariant>>,
-    scratch: Vec<u8>,
-}
-
-impl DecompressorIter {
-    fn new(stream: Bytes, compression: Option<Compression>, scratch: Vec<u8>) -> Self {
-        Self {
-            stream: BytesMut::from(stream.as_ref()),
-            current: None,
-            compression: compression.map(get_decompressor_variant),
-            scratch,
-        }
-    }
-}
-
-impl FallibleStreamingIterator for DecompressorIter {
-    type Item = [u8];
-
-    type Error = OrcError;
-
-    #[inline]
-    fn advance(&mut self) -> Result<(), Self::Error> {
-        if self.stream.is_empty() {
-            self.current = None;
-            return Ok(());
-        }
-
-        match &self.compression {
-            Some(compression) => {
-                // TODO: take stratch from current State::Compressed for re-use
-                let header = self.stream.split_to(3);
-                let header = [header[0], header[1], header[2]];
-                match decode_header(header) {
-                    CompressionHeader::Original(length) => {
-                        let original = self.stream.split_to(length as usize);
-                        self.current = Some(State::Original(original.into()));
-                    }
-                    CompressionHeader::Compressed(length) => {
-                        let compressed = self.stream.split_to(length as usize);
-                        compression.decompress_block(&compressed, &mut self.scratch)?;
-                        self.current = Some(State::Compressed(std::mem::take(&mut self.scratch)));
-                    }
-                };
-                Ok(())
-            }
-            None => {
-                // TODO: take stratch from current State::Compressed for re-use
-                self.current = Some(State::Original(self.stream.clone().into()));
-                self.stream.clear();
-                Ok(())
-            }
-        }
-    }
-
-    #[inline]
-    fn get(&self) -> Option<&Self::Item> {
-        self.current.as_ref().map(|x| match x {
-            State::Original(x) => x.as_ref(),
-            State::Compressed(x) => x.as_ref(),
-        })
-    }
-}
-
-/// A [`Read`]er fulfilling the ORC specification of reading compressed data.
-pub(crate) struct Decompressor {
-    decompressor: DecompressorIter,
-    offset: usize,
-    is_first: bool,
-}
-
-impl Decompressor {
-    /// Creates a new [`Decompressor`] that will use `scratch` as a temporary region.
-    pub fn new(stream: Bytes, compression: Option<Compression>, scratch: Vec<u8>) -> Self {
-        Self {
-            decompressor: DecompressorIter::new(stream, compression, scratch),
-            offset: 0,
-            is_first: true,
-        }
-    }
-
-    // TODO: remove need for this upstream
-    pub fn empty() -> Self {
-        Self {
-            decompressor: DecompressorIter::new(Bytes::new(), None, vec![]),
-            offset: 0,
-            is_first: true,
-        }
-    }
-}
-
-impl std::io::Read for Decompressor {
-    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
-        if self.is_first {
-            self.is_first = false;
-            self.decompressor.advance().unwrap();
-        }
-        let current = self.decompressor.get();
-        let current = if let Some(current) = current {
-            if current.len() == self.offset {
-                self.decompressor.advance().unwrap();
-                self.offset = 0;
-                let current = self.decompressor.get();
-                if let Some(current) = current {
-                    current
-                } else {
-                    return Ok(0);
-                }
-            } else {
-                &current[self.offset..]
-            }
-        } else {
-            return Ok(0);
-        };
-
-        if current.len() >= buf.len() {
-            buf.copy_from_slice(&current[..buf.len()]);
-            self.offset += buf.len();
-            Ok(buf.len())
-        } else {
-            buf[..current.len()].copy_from_slice(current);
-            self.offset += current.len();
-            Ok(current.len())
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn decode_uncompressed() {
-        // 5 uncompressed = [0x0b, 0x00, 0x00] = [0b1011, 0, 0]
-        let bytes = [0b1011, 0, 0];
-
-        let expected = CompressionHeader::Original(5);
-        let actual = decode_header(bytes);
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn decode_compressed() {
-        // 100_000 compressed = [0x40, 0x0d, 0x03] = [0b01000000, 0b00001101, 0b00000011]
-        let bytes = [0b0100_0000, 0b0000_1101, 0b0000_0011];
-        let expected = CompressionHeader::Compressed(100_000);
-        let actual = decode_header(bytes);
-        assert_eq!(expected, actual);
-    }
-}
diff --git a/src/encoding/boolean.rs b/src/encoding/boolean.rs
deleted file mode 100644
index f86e585a..00000000
--- a/src/encoding/boolean.rs
+++ /dev/null
@@ -1,170 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::io::Read;
-
-use arrow::{
-    array::BooleanBufferBuilder,
-    buffer::{BooleanBuffer, NullBuffer},
-};
-use bytes::Bytes;
-
-use crate::{error::Result, memory::EstimateMemory};
-
-use super::{
-    byte::{ByteRleDecoder, ByteRleEncoder},
-    PrimitiveValueDecoder, PrimitiveValueEncoder,
-};
-
-pub struct BooleanDecoder<R: Read> {
-    decoder: ByteRleDecoder<R>,
-    data: u8,
-    bits_in_data: usize,
-}
-
-impl<R: Read> BooleanDecoder<R> {
-    pub fn new(reader: R) -> Self {
-        Self {
-            decoder: ByteRleDecoder::new(reader),
-            bits_in_data: 0,
-            data: 0,
-        }
-    }
-
-    pub fn value(&mut self) -> bool {
-        let value = (self.data & 0x80) != 0;
-        self.data <<= 1;
-        self.bits_in_data -= 1;
-
-        value
-    }
-}
-
-impl<R: Read> PrimitiveValueDecoder<bool> for BooleanDecoder<R> {
-    // TODO: can probably implement this better
-    fn decode(&mut self, out: &mut [bool]) -> Result<()> {
-        for x in out.iter_mut() {
-            // read more data if necessary
-            if self.bits_in_data == 0 {
-                let mut data = [0];
-                self.decoder.decode(&mut data)?;
-                self.data = data[0] as u8;
-                self.bits_in_data = 8;
-            }
-            *x = self.value();
-        }
-        Ok(())
-    }
-}
-
-/// ORC encodes validity starting from MSB, whilst Arrow encodes it
-/// from LSB. After bytes are filled with the present bits, they are
-/// further encoded via Byte RLE.
-pub struct BooleanEncoder {
-    // TODO: can we refactor to not need two separate buffers?
-    byte_encoder: ByteRleEncoder,
-    builder: BooleanBufferBuilder,
-}
-
-impl EstimateMemory for BooleanEncoder {
-    fn estimate_memory_size(&self) -> usize {
-        self.builder.len() / 8
-    }
-}
-
-impl BooleanEncoder {
-    pub fn new() -> Self {
-        Self {
-            byte_encoder: ByteRleEncoder::new(),
-            builder: BooleanBufferBuilder::new(8),
-        }
-    }
-
-    pub fn extend(&mut self, null_buffer: &NullBuffer) {
-        let bb = null_buffer.inner();
-        self.extend_bb(bb);
-    }
-
-    pub fn extend_bb(&mut self, bb: &BooleanBuffer) {
-        self.builder.append_buffer(bb);
-    }
-
-    /// Extend with n true bits.
-    pub fn extend_present(&mut self, n: usize) {
-        self.builder.append_n(n, true);
-    }
-
-    pub fn extend_boolean(&mut self, b: bool) {
-        self.builder.append(b);
-    }
-
-    /// Produce ORC present stream bytes and reset internal builder.
-    pub fn finish(&mut self) -> Bytes {
-        // TODO: don't throw away allocation?
-        let bb = self.builder.finish();
-        // We use BooleanBufferBuilder so offset is 0
-        let bytes = bb.values();
-        // Reverse bits as ORC stores from MSB
-        let bytes = bytes.iter().map(|b| b.reverse_bits()).collect::<Vec<_>>();
-        for &b in bytes.as_slice() {
-            self.byte_encoder.write_one(b as i8);
-        }
-        self.byte_encoder.take_inner()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn basic() {
-        let expected = vec![false; 800];
-        let data = [0x61u8, 0x00];
-        let data = &mut data.as_ref();
-        let mut decoder = BooleanDecoder::new(data);
-        let mut actual = vec![true; expected.len()];
-        decoder.decode(&mut actual).unwrap();
-        assert_eq!(actual, expected)
-    }
-
-    #[test]
-    fn literals() {
-        let expected = vec![
-            false, true, false, false, false, true, false, false, // 0b01000100
-            false, true, false, false, false, true, false, true, // 0b01000101
-        ];
-        let data = [0xfeu8, 0b01000100, 0b01000101];
-        let data = &mut data.as_ref();
-        let mut decoder = BooleanDecoder::new(data);
-        let mut actual = vec![true; expected.len()];
-        decoder.decode(&mut actual).unwrap();
-        assert_eq!(actual, expected)
-    }
-
-    #[test]
-    fn another() {
-        // "For example, the byte sequence [0xff, 0x80] would be one true followed by seven false values."
-        let expected = vec![true, false, false, false, false, false, false, false];
-        let data = [0xff, 0x80];
-        let data = &mut data.as_ref();
-        let mut decoder = BooleanDecoder::new(data);
-        let mut actual = vec![true; expected.len()];
-        decoder.decode(&mut actual).unwrap();
-        assert_eq!(actual, expected)
-    }
-}
diff --git a/src/encoding/byte.rs b/src/encoding/byte.rs
deleted file mode 100644
index d2ad1999..00000000
--- a/src/encoding/byte.rs
+++ /dev/null
@@ -1,340 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use bytemuck::must_cast_slice;
-use bytes::{BufMut, BytesMut};
-use snafu::ResultExt;
-
-use crate::{
-    error::{IoSnafu, Result},
-    memory::EstimateMemory,
-};
-use std::io::Read;
-
-use super::{rle::GenericRle, util::read_u8, PrimitiveValueEncoder};
-
-const MAX_LITERAL_LENGTH: usize = 128;
-const MIN_REPEAT_LENGTH: usize = 3;
-const MAX_REPEAT_LENGTH: usize = 130;
-
-pub struct ByteRleEncoder {
-    writer: BytesMut,
-    /// Literal values to encode.
-    literals: [u8; MAX_LITERAL_LENGTH],
-    /// Represents the number of elements currently in `literals` if Literals,
-    /// otherwise represents the length of the Run.
-    num_literals: usize,
-    /// Tracks if current Literal sequence will turn into a Run sequence due to
-    /// repeated values at the end of the value sequence.
-    tail_run_length: usize,
-    /// If in Run sequence or not, and keeps the corresponding value.
-    run_value: Option<u8>,
-}
-
-impl ByteRleEncoder {
-    /// Incrementally encode bytes using Run Length Encoding, where the subencodings are:
-    ///   - Run: at least 3 repeated values in sequence (up to `MAX_REPEAT_LENGTH`)
-    ///   - Literals: disparate values (up to `MAX_LITERAL_LENGTH` length)
-    ///
-    /// How the relevant encodings are chosen:
-    ///   - Keep of track of values as they come, starting off assuming Literal sequence
-    ///   - Keep track of latest value, to see if we are encountering a sequence of repeated
-    ///     values (Run sequence)
-    ///   - If this tail end exceeds the required minimum length, flush the current Literal
-    ///     sequence (or switch to Run if entire current sequence is the repeated value)
-    ///   - Whether in Literal or Run mode, keep buffering values and flushing when max length
-    ///     reached or encoding is broken (e.g. non-repeated value found in Run mode)
-    fn process_value(&mut self, value: u8) {
-        // Adapted from https://github.com/apache/orc/blob/main/java/core/src/java/org/apache/orc/impl/RunLengthByteWriter.java
-        if self.num_literals == 0 {
-            // Start off in Literal mode
-            self.run_value = None;
-            self.literals[0] = value;
-            self.num_literals = 1;
-            self.tail_run_length = 1;
-        } else if let Some(run_value) = self.run_value {
-            // Run mode
-
-            if value == run_value {
-                // Continue buffering for Run sequence, flushing if reaching max length
-                self.num_literals += 1;
-                if self.num_literals == MAX_REPEAT_LENGTH {
-                    write_run(&mut self.writer, run_value, MAX_REPEAT_LENGTH);
-                    self.clear_state();
-                }
-            } else {
-                // Run is broken, flush then start again in Literal mode
-                write_run(&mut self.writer, run_value, self.num_literals);
-                self.run_value = None;
-                self.literals[0] = value;
-                self.num_literals = 1;
-                self.tail_run_length = 1;
-            }
-        } else {
-            // Literal mode
-
-            // tail_run_length tracks length of repetition of last value
-            if value == self.literals[self.num_literals - 1] {
-                self.tail_run_length += 1;
-            } else {
-                self.tail_run_length = 1;
-            }
-
-            if self.tail_run_length == MIN_REPEAT_LENGTH {
-                // When the tail end of the current sequence is enough for a Run sequence
-
-                if self.num_literals + 1 == MIN_REPEAT_LENGTH {
-                    // If current values are enough for a Run sequence, switch to Run encoding
-                    self.run_value = Some(value);
-                    self.num_literals += 1;
-                } else {
-                    // Flush the current Literal sequence, then switch to Run encoding
-                    // We don't flush the tail end which is a Run sequence
-                    let len = self.num_literals - (MIN_REPEAT_LENGTH - 1);
-                    let literals = &self.literals[..len];
-                    write_literals(&mut self.writer, literals);
-                    self.run_value = Some(value);
-                    self.num_literals = MIN_REPEAT_LENGTH;
-                }
-            } else {
-                // Continue buffering for Literal sequence, flushing if reaching max length
-                self.literals[self.num_literals] = value;
-                self.num_literals += 1;
-                if self.num_literals == MAX_LITERAL_LENGTH {
-                    // Entire literals is filled, pass in as is
-                    write_literals(&mut self.writer, &self.literals);
-                    self.clear_state();
-                }
-            }
-        }
-    }
-
-    fn clear_state(&mut self) {
-        self.run_value = None;
-        self.tail_run_length = 0;
-        self.num_literals = 0;
-    }
-
-    /// Flush any buffered values to writer in appropriate sequence.
-    fn flush(&mut self) {
-        if self.num_literals != 0 {
-            if let Some(value) = self.run_value {
-                write_run(&mut self.writer, value, self.num_literals);
-            } else {
-                let literals = &self.literals[..self.num_literals];
-                write_literals(&mut self.writer, literals);
-            }
-            self.clear_state();
-        }
-    }
-}
-
-impl EstimateMemory for ByteRleEncoder {
-    fn estimate_memory_size(&self) -> usize {
-        self.writer.len() + self.num_literals
-    }
-}
-
-/// i8 to match with Arrow Int8 type.
-impl PrimitiveValueEncoder<i8> for ByteRleEncoder {
-    fn new() -> Self {
-        Self {
-            writer: BytesMut::new(),
-            literals: [0; MAX_LITERAL_LENGTH],
-            num_literals: 0,
-            tail_run_length: 0,
-            run_value: None,
-        }
-    }
-
-    fn write_one(&mut self, value: i8) {
-        self.process_value(value as u8);
-    }
-
-    fn take_inner(&mut self) -> bytes::Bytes {
-        self.flush();
-        std::mem::take(&mut self.writer).into()
-    }
-}
-
-fn write_run(writer: &mut BytesMut, value: u8, run_length: usize) {
-    debug_assert!(
-        (MIN_REPEAT_LENGTH..=MAX_REPEAT_LENGTH).contains(&run_length),
-        "Byte RLE Run sequence must be in range 3..=130"
-    );
-    // [3, 130] to [0, 127]
-    let header = run_length - MIN_REPEAT_LENGTH;
-    writer.put_u8(header as u8);
-    writer.put_u8(value);
-}
-
-fn write_literals(writer: &mut BytesMut, literals: &[u8]) {
-    debug_assert!(
-        (1..=MAX_LITERAL_LENGTH).contains(&literals.len()),
-        "Byte RLE Literal sequence must be in range 1..=128"
-    );
-    // [1, 128] to [-1, -128], then writing as a byte
-    let header = -(literals.len() as i32);
-    writer.put_u8(header as u8);
-    writer.put_slice(literals);
-}
-
-pub struct ByteRleDecoder<R> {
-    reader: R,
-    /// Values that have been decoded but not yet emitted.
-    leftovers: Vec<u8>,
-    /// Index into leftovers to make it act like a queue; indicates the
-    /// next element available to read
-    index: usize,
-}
-
-impl<R: Read> ByteRleDecoder<R> {
-    pub fn new(reader: R) -> Self {
-        Self {
-            reader,
-            leftovers: Vec::with_capacity(MAX_REPEAT_LENGTH),
-            index: 0,
-        }
-    }
-}
-
-impl<R: Read> GenericRle<i8> for ByteRleDecoder<R> {
-    fn advance(&mut self, n: usize) {
-        self.index += n
-    }
-
-    fn available(&self) -> &[i8] {
-        let bytes = &self.leftovers[self.index..];
-        must_cast_slice(bytes)
-    }
-
-    fn decode_batch(&mut self) -> Result<()> {
-        self.index = 0;
-        self.leftovers.clear();
-
-        let header = read_u8(&mut self.reader)?;
-        if header < 0x80 {
-            // Run of repeated value
-            let length = header as usize + MIN_REPEAT_LENGTH;
-            let value = read_u8(&mut self.reader)?;
-            self.leftovers.extend(std::iter::repeat(value).take(length));
-        } else {
-            // List of values
-            let length = 0x100 - header as usize;
-            self.leftovers.resize(length, 0);
-            self.reader
-                .read_exact(&mut self.leftovers)
-                .context(IoSnafu)?;
-        }
-        Ok(())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::io::Cursor;
-
-    use crate::encoding::PrimitiveValueDecoder;
-
-    use super::*;
-
-    use proptest::prelude::*;
-
-    // TODO: have tests varying the out buffer, to ensure decode() is called
-    //       multiple times
-
-    fn test_helper(data: &[u8], expected: &[i8]) {
-        let mut reader = ByteRleDecoder::new(Cursor::new(data));
-        let mut actual = vec![0; expected.len()];
-        reader.decode(&mut actual).unwrap();
-        assert_eq!(actual, expected);
-    }
-
-    #[test]
-    fn reader_test() {
-        let data = [0x61u8, 0x00];
-        let expected = [0; 100];
-        test_helper(&data, &expected);
-
-        let data = [0x01, 0x01];
-        let expected = [1; 4];
-        test_helper(&data, &expected);
-
-        let data = [0xfe, 0x44, 0x45];
-        let expected = [0x44, 0x45];
-        test_helper(&data, &expected);
-    }
-
-    fn roundtrip_byte_rle_helper(values: &[i8]) -> Result<Vec<i8>> {
-        let mut writer = ByteRleEncoder::new();
-        writer.write_slice(values);
-        writer.flush();
-
-        let buf = writer.take_inner();
-        let mut cursor = Cursor::new(&buf);
-        let mut reader = ByteRleDecoder::new(&mut cursor);
-        let mut actual = vec![0; values.len()];
-        reader.decode(&mut actual)?;
-        Ok(actual)
-    }
-
-    #[derive(Debug, Clone)]
-    enum ByteSequence {
-        Run(i8, usize),
-        Literals(Vec<i8>),
-    }
-
-    fn byte_sequence_strategy() -> impl Strategy<Value = ByteSequence> {
-        // We limit the max length of the sequences to 140 to try get more interleaving
-        prop_oneof![
-            (any::<i8>(), 1..140_usize).prop_map(|(a, b)| ByteSequence::Run(a, b)),
-            prop::collection::vec(any::<i8>(), 1..140).prop_map(ByteSequence::Literals)
-        ]
-    }
-
-    fn generate_bytes_from_sequences(sequences: Vec<ByteSequence>) -> Vec<i8> {
-        let mut bytes = vec![];
-        for sequence in sequences {
-            match sequence {
-                ByteSequence::Run(value, length) => {
-                    bytes.extend(std::iter::repeat(value).take(length))
-                }
-                ByteSequence::Literals(literals) => bytes.extend(literals),
-            }
-        }
-        bytes
-    }
-
-    proptest! {
-        #[test]
-        fn roundtrip_byte_rle_pure_random(values: Vec<i8>) {
-            // Biased towards literal sequences due to purely random values
-            let out = roundtrip_byte_rle_helper(&values).unwrap();
-            prop_assert_eq!(out, values);
-        }
-
-        #[test]
-        fn roundtrip_byte_rle_biased(
-            sequences in prop::collection::vec(byte_sequence_strategy(), 1..200)
-        ) {
-            // Intentionally introduce run sequences to not be entirely random literals
-            let values = generate_bytes_from_sequences(sequences);
-            let out = roundtrip_byte_rle_helper(&values).unwrap();
-            prop_assert_eq!(out, values);
-        }
-    }
-}
diff --git a/src/encoding/decimal.rs b/src/encoding/decimal.rs
deleted file mode 100644
index f722cf0a..00000000
--- a/src/encoding/decimal.rs
+++ /dev/null
@@ -1,45 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::io::Read;
-
-use crate::error::Result;
-
-use super::{
-    integer::{read_varint_zigzagged, SignedEncoding},
-    PrimitiveValueDecoder,
-};
-
-/// Read stream of zigzag encoded varints as i128 (unbound).
-pub struct UnboundedVarintStreamDecoder<R: Read> {
-    reader: R,
-}
-
-impl<R: Read> UnboundedVarintStreamDecoder<R> {
-    pub fn new(reader: R) -> Self {
-        Self { reader }
-    }
-}
-
-impl<R: Read> PrimitiveValueDecoder<i128> for UnboundedVarintStreamDecoder<R> {
-    fn decode(&mut self, out: &mut [i128]) -> Result<()> {
-        for x in out.iter_mut() {
-            *x = read_varint_zigzagged::<i128, _, SignedEncoding>(&mut self.reader)?;
-        }
-        Ok(())
-    }
-}
diff --git a/src/encoding/float.rs b/src/encoding/float.rs
deleted file mode 100644
index 5b9fa7ec..00000000
--- a/src/encoding/float.rs
+++ /dev/null
@@ -1,179 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::marker::PhantomData;
-
-use bytemuck::{must_cast_slice, must_cast_slice_mut};
-use bytes::{Bytes, BytesMut};
-use snafu::ResultExt;
-
-use crate::{
-    error::{IoSnafu, Result},
-    memory::EstimateMemory,
-};
-
-use super::{PrimitiveValueDecoder, PrimitiveValueEncoder};
-
-/// Collect all the required traits we need on floats.
-pub trait Float:
-    num::Float + std::fmt::Debug + bytemuck::NoUninit + bytemuck::AnyBitPattern
-{
-}
-impl Float for f32 {}
-impl Float for f64 {}
-
-pub struct FloatDecoder<F: Float, R: std::io::Read> {
-    reader: R,
-    phantom: std::marker::PhantomData<F>,
-}
-
-impl<F: Float, R: std::io::Read> FloatDecoder<F, R> {
-    pub fn new(reader: R) -> Self {
-        Self {
-            reader,
-            phantom: Default::default(),
-        }
-    }
-}
-
-impl<F: Float, R: std::io::Read> PrimitiveValueDecoder<F> for FloatDecoder<F, R> {
-    fn decode(&mut self, out: &mut [F]) -> Result<()> {
-        let bytes = must_cast_slice_mut::<F, u8>(out);
-        self.reader.read_exact(bytes).context(IoSnafu)?;
-        Ok(())
-    }
-}
-
-/// No special run encoding for floats/doubles, they are stored as their IEEE 754 floating
-/// point bit layout. This encoder simply copies incoming floats/doubles to its internal
-/// byte buffer.
-pub struct FloatEncoder<F: Float> {
-    data: BytesMut,
-    _phantom: PhantomData<F>,
-}
-
-impl<F: Float> EstimateMemory for FloatEncoder<F> {
-    fn estimate_memory_size(&self) -> usize {
-        self.data.len()
-    }
-}
-
-impl<F: Float> PrimitiveValueEncoder<F> for FloatEncoder<F> {
-    fn new() -> Self {
-        Self {
-            data: BytesMut::new(),
-            _phantom: Default::default(),
-        }
-    }
-
-    fn write_one(&mut self, value: F) {
-        self.write_slice(&[value]);
-    }
-
-    fn write_slice(&mut self, values: &[F]) {
-        let bytes = must_cast_slice::<F, u8>(values);
-        self.data.extend_from_slice(bytes);
-    }
-
-    fn take_inner(&mut self) -> Bytes {
-        std::mem::take(&mut self.data).into()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::f32::consts as f32c;
-    use std::f64::consts as f64c;
-    use std::io::Cursor;
-
-    use proptest::prelude::*;
-
-    use super::*;
-
-    fn roundtrip_helper<F: Float>(input: &[F]) -> Result<Vec<F>> {
-        let mut encoder = FloatEncoder::<F>::new();
-        encoder.write_slice(input);
-        let bytes = encoder.take_inner();
-        let bytes = Cursor::new(bytes);
-
-        let mut iter = FloatDecoder::<F, _>::new(bytes);
-        let mut actual = vec![F::zero(); input.len()];
-        iter.decode(&mut actual)?;
-
-        Ok(actual)
-    }
-
-    fn assert_roundtrip<F: Float>(input: Vec<F>) {
-        let actual = roundtrip_helper(&input).unwrap();
-        assert_eq!(input, actual);
-    }
-
-    proptest! {
-        #[test]
-        fn roundtrip_f32(values: Vec<f32>) {
-            let out = roundtrip_helper(&values)?;
-            prop_assert_eq!(out, values);
-        }
-
-        #[test]
-        fn roundtrip_f64(values: Vec<f64>) {
-            let out = roundtrip_helper(&values)?;
-            prop_assert_eq!(out, values);
-        }
-    }
-
-    #[test]
-    fn test_float_edge_cases() {
-        assert_roundtrip::<f32>(vec![]);
-        assert_roundtrip::<f64>(vec![]);
-
-        assert_roundtrip(vec![f32c::PI]);
-        assert_roundtrip(vec![f64c::PI]);
-
-        let actual = roundtrip_helper(&[f32::NAN]).unwrap();
-        assert!(actual[0].is_nan());
-        let actual = roundtrip_helper(&[f64::NAN]).unwrap();
-        assert!(actual[0].is_nan());
-    }
-
-    #[test]
-    fn test_float_many() {
-        assert_roundtrip(vec![
-            f32::NEG_INFINITY,
-            f32::MIN,
-            -1.0,
-            -0.0,
-            0.0,
-            1.0,
-            f32c::SQRT_2,
-            f32::MAX,
-            f32::INFINITY,
-        ]);
-
-        assert_roundtrip(vec![
-            f64::NEG_INFINITY,
-            f64::MIN,
-            -1.0,
-            -0.0,
-            0.0,
-            1.0,
-            f64c::SQRT_2,
-            f64::MAX,
-            f64::INFINITY,
-        ]);
-    }
-}
diff --git a/src/encoding/integer/mod.rs b/src/encoding/integer/mod.rs
deleted file mode 100644
index f652d4e9..00000000
--- a/src/encoding/integer/mod.rs
+++ /dev/null
@@ -1,327 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Run length encoding & decoding for integers
-
-use std::{
-    fmt,
-    io::Read,
-    ops::{BitOrAssign, ShlAssign},
-};
-
-use num::{traits::CheckedShl, PrimInt, Signed};
-use rle_v1::RleV1Decoder;
-use rle_v2::RleV2Decoder;
-use snafu::ResultExt;
-use util::{
-    get_closest_aligned_bit_width, signed_msb_decode, signed_zigzag_decode, signed_zigzag_encode,
-};
-
-use crate::{
-    column::Column,
-    error::{InvalidColumnEncodingSnafu, IoSnafu, Result},
-    proto::column_encoding::Kind as ProtoColumnKind,
-};
-
-use super::PrimitiveValueDecoder;
-
-pub mod rle_v1;
-pub mod rle_v2;
-mod util;
-
-// TODO: consider having a separate varint.rs
-pub use util::read_varint_zigzagged;
-
-pub fn get_unsigned_rle_reader<R: Read + Send + 'static>(
-    column: &Column,
-    reader: R,
-) -> Box<dyn PrimitiveValueDecoder<i64> + Send> {
-    match column.encoding().kind() {
-        ProtoColumnKind::Direct | ProtoColumnKind::Dictionary => {
-            Box::new(RleV1Decoder::<i64, _, UnsignedEncoding>::new(reader))
-        }
-        ProtoColumnKind::DirectV2 | ProtoColumnKind::DictionaryV2 => {
-            Box::new(RleV2Decoder::<i64, _, UnsignedEncoding>::new(reader))
-        }
-    }
-}
-
-pub fn get_rle_reader<N: NInt, R: Read + Send + 'static>(
-    column: &Column,
-    reader: R,
-) -> Result<Box<dyn PrimitiveValueDecoder<N> + Send>> {
-    match column.encoding().kind() {
-        ProtoColumnKind::Direct => Ok(Box::new(RleV1Decoder::<N, _, SignedEncoding>::new(reader))),
-        ProtoColumnKind::DirectV2 => {
-            Ok(Box::new(RleV2Decoder::<N, _, SignedEncoding>::new(reader)))
-        }
-        k => InvalidColumnEncodingSnafu {
-            name: column.name(),
-            encoding: k,
-        }
-        .fail(),
-    }
-}
-
-pub trait EncodingSign: Send + 'static {
-    // TODO: have separate type/trait to represent Zigzag encoded NInt?
-    fn zigzag_decode<N: VarintSerde>(v: N) -> N;
-    fn zigzag_encode<N: VarintSerde>(v: N) -> N;
-
-    fn decode_signed_msb<N: NInt>(v: N, encoded_byte_size: usize) -> N;
-}
-
-pub struct SignedEncoding;
-
-impl EncodingSign for SignedEncoding {
-    #[inline]
-    fn zigzag_decode<N: VarintSerde>(v: N) -> N {
-        signed_zigzag_decode(v)
-    }
-
-    #[inline]
-    fn zigzag_encode<N: VarintSerde>(v: N) -> N {
-        signed_zigzag_encode(v)
-    }
-
-    #[inline]
-    fn decode_signed_msb<N: NInt>(v: N, encoded_byte_size: usize) -> N {
-        signed_msb_decode(v, encoded_byte_size)
-    }
-}
-
-pub struct UnsignedEncoding;
-
-impl EncodingSign for UnsignedEncoding {
-    #[inline]
-    fn zigzag_decode<N: VarintSerde>(v: N) -> N {
-        v
-    }
-
-    #[inline]
-    fn zigzag_encode<N: VarintSerde>(v: N) -> N {
-        v
-    }
-
-    #[inline]
-    fn decode_signed_msb<N: NInt>(v: N, _encoded_byte_size: usize) -> N {
-        v
-    }
-}
-
-pub trait VarintSerde: PrimInt + CheckedShl + BitOrAssign + Signed {
-    const BYTE_SIZE: usize;
-
-    /// Calculate the minimum bit size required to represent this value, by truncating
-    /// the leading zeros.
-    #[inline]
-    fn bits_used(self) -> usize {
-        Self::BYTE_SIZE * 8 - self.leading_zeros() as usize
-    }
-
-    /// Feeds [`Self::bits_used`] into a mapping to get an aligned bit width.
-    fn closest_aligned_bit_width(self) -> usize {
-        get_closest_aligned_bit_width(self.bits_used())
-    }
-
-    fn from_u8(b: u8) -> Self;
-}
-
-/// Helps generalise the decoder efforts to be specific to supported integers.
-/// (Instead of decoding to u64/i64 for all then downcasting).
-pub trait NInt:
-    VarintSerde + ShlAssign<usize> + fmt::Debug + fmt::Display + fmt::Binary + Send + Sync + 'static
-{
-    type Bytes: AsRef<[u8]> + AsMut<[u8]> + Default + Clone + Copy + fmt::Debug;
-
-    #[inline]
-    fn empty_byte_array() -> Self::Bytes {
-        Self::Bytes::default()
-    }
-
-    /// Should truncate any extra bits.
-    fn from_i64(u: i64) -> Self;
-
-    fn from_be_bytes(b: Self::Bytes) -> Self;
-
-    // TODO: use num_traits::ToBytes instead
-    fn to_be_bytes(self) -> Self::Bytes;
-
-    fn add_i64(self, i: i64) -> Option<Self>;
-
-    fn sub_i64(self, i: i64) -> Option<Self>;
-
-    // TODO: use Into<i64> instead?
-    fn as_i64(self) -> i64;
-
-    fn read_big_endian(reader: &mut impl Read, byte_size: usize) -> Result<Self> {
-        debug_assert!(
-            byte_size <= Self::BYTE_SIZE,
-            "byte_size cannot exceed max byte size of self"
-        );
-        let mut buffer = Self::empty_byte_array();
-        // Read into back part of buffer since is big endian.
-        // So if smaller than N::BYTE_SIZE bytes, most significant bytes will be 0.
-        reader
-            .read_exact(&mut buffer.as_mut()[Self::BYTE_SIZE - byte_size..])
-            .context(IoSnafu)?;
-        Ok(Self::from_be_bytes(buffer))
-    }
-}
-
-impl VarintSerde for i16 {
-    const BYTE_SIZE: usize = 2;
-
-    #[inline]
-    fn from_u8(b: u8) -> Self {
-        b as Self
-    }
-}
-
-impl VarintSerde for i32 {
-    const BYTE_SIZE: usize = 4;
-
-    #[inline]
-    fn from_u8(b: u8) -> Self {
-        b as Self
-    }
-}
-
-impl VarintSerde for i64 {
-    const BYTE_SIZE: usize = 8;
-
-    #[inline]
-    fn from_u8(b: u8) -> Self {
-        b as Self
-    }
-}
-
-impl VarintSerde for i128 {
-    const BYTE_SIZE: usize = 16;
-
-    #[inline]
-    fn from_u8(b: u8) -> Self {
-        b as Self
-    }
-}
-
-// We only implement for i16, i32, i64 and u64.
-// ORC supports only signed Short, Integer and Long types for its integer types,
-// and i8 is encoded as bytes. u64 is used for other encodings such as Strings
-// (to encode length, etc.).
-
-impl NInt for i16 {
-    type Bytes = [u8; 2];
-
-    #[inline]
-    fn from_i64(i: i64) -> Self {
-        i as Self
-    }
-
-    #[inline]
-    fn from_be_bytes(b: Self::Bytes) -> Self {
-        Self::from_be_bytes(b)
-    }
-
-    #[inline]
-    fn to_be_bytes(self) -> Self::Bytes {
-        self.to_be_bytes()
-    }
-
-    #[inline]
-    fn add_i64(self, i: i64) -> Option<Self> {
-        i.try_into().ok().and_then(|i| self.checked_add(i))
-    }
-
-    #[inline]
-    fn sub_i64(self, i: i64) -> Option<Self> {
-        i.try_into().ok().and_then(|i| self.checked_sub(i))
-    }
-
-    #[inline]
-    fn as_i64(self) -> i64 {
-        self as i64
-    }
-}
-
-impl NInt for i32 {
-    type Bytes = [u8; 4];
-
-    #[inline]
-    fn from_i64(i: i64) -> Self {
-        i as Self
-    }
-
-    #[inline]
-    fn from_be_bytes(b: Self::Bytes) -> Self {
-        Self::from_be_bytes(b)
-    }
-
-    #[inline]
-    fn to_be_bytes(self) -> Self::Bytes {
-        self.to_be_bytes()
-    }
-
-    #[inline]
-    fn add_i64(self, i: i64) -> Option<Self> {
-        i.try_into().ok().and_then(|i| self.checked_add(i))
-    }
-
-    #[inline]
-    fn sub_i64(self, i: i64) -> Option<Self> {
-        i.try_into().ok().and_then(|i| self.checked_sub(i))
-    }
-
-    #[inline]
-    fn as_i64(self) -> i64 {
-        self as i64
-    }
-}
-
-impl NInt for i64 {
-    type Bytes = [u8; 8];
-
-    #[inline]
-    fn from_i64(i: i64) -> Self {
-        i as Self
-    }
-
-    #[inline]
-    fn from_be_bytes(b: Self::Bytes) -> Self {
-        Self::from_be_bytes(b)
-    }
-
-    #[inline]
-    fn to_be_bytes(self) -> Self::Bytes {
-        self.to_be_bytes()
-    }
-
-    #[inline]
-    fn add_i64(self, i: i64) -> Option<Self> {
-        self.checked_add(i)
-    }
-
-    #[inline]
-    fn sub_i64(self, i: i64) -> Option<Self> {
-        self.checked_sub(i)
-    }
-
-    #[inline]
-    fn as_i64(self) -> i64 {
-        self
-    }
-}
diff --git a/src/encoding/integer/rle_v1.rs b/src/encoding/integer/rle_v1.rs
deleted file mode 100644
index 02c04953..00000000
--- a/src/encoding/integer/rle_v1.rs
+++ /dev/null
@@ -1,186 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Handling decoding of Integer Run Length Encoded V1 data in ORC files
-
-use std::{io::Read, marker::PhantomData};
-
-use snafu::OptionExt;
-
-use crate::{
-    encoding::{
-        rle::GenericRle,
-        util::{read_u8, try_read_u8},
-    },
-    error::{OutOfSpecSnafu, Result},
-};
-
-use super::{util::read_varint_zigzagged, EncodingSign, NInt};
-
-const MAX_RUN_LENGTH: usize = 130;
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-enum EncodingType {
-    Run { length: usize, delta: i8 },
-    Literals { length: usize },
-}
-
-impl EncodingType {
-    /// Decode header byte to determine sub-encoding.
-    /// Runs start with a positive byte, and literals with a negative byte.
-    fn from_header<R: Read>(reader: &mut R) -> Result<Option<Self>> {
-        let opt_encoding = match try_read_u8(reader)?.map(|b| b as i8) {
-            Some(header) if header < 0 => {
-                let length = header.unsigned_abs() as usize;
-                Some(Self::Literals { length })
-            }
-            Some(header) => {
-                let length = header as u8 as usize + 3;
-                let delta = read_u8(reader)? as i8;
-                Some(Self::Run { length, delta })
-            }
-            None => None,
-        };
-        Ok(opt_encoding)
-    }
-}
-
-/// Decodes a stream of Integer Run Length Encoded version 1 bytes.
-pub struct RleV1Decoder<N: NInt, R: Read, S: EncodingSign> {
-    reader: R,
-    decoded_ints: Vec<N>,
-    current_head: usize,
-    sign: PhantomData<S>,
-}
-
-impl<N: NInt, R: Read, S: EncodingSign> RleV1Decoder<N, R, S> {
-    pub fn new(reader: R) -> Self {
-        Self {
-            reader,
-            decoded_ints: Vec::with_capacity(MAX_RUN_LENGTH),
-            current_head: 0,
-            sign: Default::default(),
-        }
-    }
-}
-
-fn read_literals<N: NInt, R: Read, S: EncodingSign>(
-    reader: &mut R,
-    out_ints: &mut Vec<N>,
-    length: usize,
-) -> Result<()> {
-    for _ in 0..length {
-        let lit = read_varint_zigzagged::<_, _, S>(reader)?;
-        out_ints.push(lit);
-    }
-    Ok(())
-}
-
-fn read_run<N: NInt, R: Read, S: EncodingSign>(
-    reader: &mut R,
-    out_ints: &mut Vec<N>,
-    length: usize,
-    delta: i8,
-) -> Result<()> {
-    let mut base = read_varint_zigzagged::<_, _, S>(reader)?;
-    // Account for base value
-    let length = length - 1;
-    out_ints.push(base);
-    if delta < 0 {
-        let delta = delta.unsigned_abs();
-        let delta = N::from_u8(delta);
-        for _ in 0..length {
-            base = base.checked_sub(&delta).context(OutOfSpecSnafu {
-                msg: "over/underflow when decoding patched base integer",
-            })?;
-            out_ints.push(base);
-        }
-    } else {
-        let delta = delta as u8;
-        let delta = N::from_u8(delta);
-        for _ in 0..length {
-            base = base.checked_add(&delta).context(OutOfSpecSnafu {
-                msg: "over/underflow when decoding patched base integer",
-            })?;
-            out_ints.push(base);
-        }
-    }
-    Ok(())
-}
-
-impl<N: NInt, R: Read, S: EncodingSign> GenericRle<N> for RleV1Decoder<N, R, S> {
-    fn advance(&mut self, n: usize) {
-        self.current_head += n;
-    }
-
-    fn available(&self) -> &[N] {
-        &self.decoded_ints[self.current_head..]
-    }
-
-    fn decode_batch(&mut self) -> Result<()> {
-        self.current_head = 0;
-        self.decoded_ints.clear();
-
-        match EncodingType::from_header(&mut self.reader)? {
-            Some(EncodingType::Literals { length }) => {
-                read_literals::<_, _, S>(&mut self.reader, &mut self.decoded_ints, length)
-            }
-            Some(EncodingType::Run { length, delta }) => {
-                read_run::<_, _, S>(&mut self.reader, &mut self.decoded_ints, length, delta)
-            }
-            None => Ok(()),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::io::Cursor;
-
-    use crate::encoding::{integer::UnsignedEncoding, PrimitiveValueDecoder};
-
-    use super::*;
-
-    fn test_helper(data: &[u8], expected: &[i64]) {
-        let mut reader = RleV1Decoder::<i64, _, UnsignedEncoding>::new(Cursor::new(data));
-        let mut actual = vec![0; expected.len()];
-        reader.decode(&mut actual).unwrap();
-        assert_eq!(actual, expected);
-    }
-
-    #[test]
-    fn test_run() -> Result<()> {
-        let data = [0x61, 0x00, 0x07];
-        let expected = [7; 100];
-        test_helper(&data, &expected);
-
-        let data = [0x61, 0xff, 0x64];
-        let expected = (1..=100).rev().collect::<Vec<_>>();
-        test_helper(&data, &expected);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_literal() -> Result<()> {
-        let data = [0xfb, 0x02, 0x03, 0x06, 0x07, 0xb];
-        let expected = vec![2, 3, 6, 7, 11];
-        test_helper(&data, &expected);
-
-        Ok(())
-    }
-}
diff --git a/src/encoding/integer/rle_v2/delta.rs b/src/encoding/integer/rle_v2/delta.rs
deleted file mode 100644
index 63e81b7f..00000000
--- a/src/encoding/integer/rle_v2/delta.rs
+++ /dev/null
@@ -1,289 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::io::Read;
-
-use bytes::{BufMut, BytesMut};
-use snafu::OptionExt;
-
-use crate::{
-    encoding::{
-        integer::{
-            rle_v2::{EncodingType, MAX_RUN_LENGTH},
-            util::{
-                extract_run_length_from_header, read_ints, read_varint_zigzagged,
-                rle_v2_decode_bit_width, rle_v2_encode_bit_width, write_aligned_packed_ints,
-                write_varint_zigzagged,
-            },
-            EncodingSign, SignedEncoding, VarintSerde,
-        },
-        util::read_u8,
-    },
-    error::{OrcError, OutOfSpecSnafu, Result},
-};
-
-use super::NInt;
-
-/// We use i64 and u64 for delta to make things easier and to avoid edge cases,
-/// as for example for i16, the delta may be too large to represent in an i16.
-// TODO: expand on the above
-pub fn read_delta_values<N: NInt, R: Read, S: EncodingSign>(
-    reader: &mut R,
-    out_ints: &mut Vec<N>,
-    deltas: &mut Vec<i64>,
-    header: u8,
-) -> Result<()> {
-    // Encoding format:
-    // 2 bytes header
-    //   - 2 bits for encoding type (constant 3)
-    //   - 5 bits for encoded delta bitwidth (0 to 64)
-    //   - 9 bits for run length (1 to 512)
-    // Base value (signed or unsigned) varint
-    // Delta value signed varint
-    // Sequence of delta values
-
-    let encoded_delta_bit_width = (header >> 1) & 0x1f;
-    // Uses same encoding table as for direct & patched base,
-    // but special case where 0 indicates 0 width (for fixed delta)
-    let delta_bit_width = if encoded_delta_bit_width == 0 {
-        encoded_delta_bit_width as usize
-    } else {
-        rle_v2_decode_bit_width(encoded_delta_bit_width)
-    };
-
-    let second_byte = read_u8(reader)?;
-    let length = extract_run_length_from_header(header, second_byte);
-
-    let base_value = read_varint_zigzagged::<N, _, S>(reader)?;
-    out_ints.push(base_value);
-
-    // Always signed since can be decreasing sequence
-    let delta_base = read_varint_zigzagged::<i64, _, SignedEncoding>(reader)?;
-    // TODO: does this get inlined?
-    let op: fn(N, i64) -> Option<N> = if delta_base.is_positive() {
-        |acc, delta| acc.add_i64(delta)
-    } else {
-        |acc, delta| acc.sub_i64(delta)
-    };
-    let delta_base = delta_base.abs(); // TODO: i64::MIN?
-
-    if delta_bit_width == 0 {
-        // If width is 0 then all values have fixed delta of delta_base
-        // Skip first value since that's base_value
-        (1..length).try_fold(base_value, |acc, _| {
-            let acc = op(acc, delta_base).context(OutOfSpecSnafu {
-                msg: "over/underflow when decoding delta integer",
-            })?;
-            out_ints.push(acc);
-            Ok::<_, OrcError>(acc)
-        })?;
-    } else {
-        deltas.clear();
-        // Add delta base and first value
-        let second_value = op(base_value, delta_base).context(OutOfSpecSnafu {
-            msg: "over/underflow when decoding delta integer",
-        })?;
-        out_ints.push(second_value);
-        // Run length includes base value and first delta, so skip them
-        let length = length - 2;
-
-        // Unpack the delta values
-        read_ints(deltas, length, delta_bit_width, reader)?;
-        let mut acc = second_value;
-        // Each element is the delta, so find actual value using running accumulator
-        for delta in deltas {
-            acc = op(acc, *delta).context(OutOfSpecSnafu {
-                msg: "over/underflow when decoding delta integer",
-            })?;
-            out_ints.push(acc);
-        }
-    }
-    Ok(())
-}
-
-pub fn write_varying_delta<N: NInt, S: EncodingSign>(
-    writer: &mut BytesMut,
-    base_value: N,
-    first_delta: i64,
-    max_delta: i64,
-    subsequent_deltas: &[i64],
-) {
-    debug_assert!(
-        max_delta > 0,
-        "varying deltas must have at least one non-zero delta"
-    );
-    let bit_width = max_delta.closest_aligned_bit_width();
-    // We can't have bit width of 1 for delta as that would get decoded as
-    // 0 bit width on reader, which indicates fixed delta, so bump 1 to 2
-    // in this case.
-    let bit_width = if bit_width == 1 { 2 } else { bit_width };
-    // Add 2 to len for the base_value and first_delta
-    let header = derive_delta_header(bit_width, subsequent_deltas.len() + 2);
-    writer.put_slice(&header);
-
-    write_varint_zigzagged::<_, S>(writer, base_value);
-    // First delta always signed to indicate increasing/decreasing sequence
-    write_varint_zigzagged::<_, SignedEncoding>(writer, first_delta);
-
-    // Bitpacked deltas
-    write_aligned_packed_ints(writer, bit_width, subsequent_deltas);
-}
-
-pub fn write_fixed_delta<N: NInt, S: EncodingSign>(
-    writer: &mut BytesMut,
-    base_value: N,
-    fixed_delta: i64,
-    subsequent_deltas_len: usize,
-) {
-    // Assuming len excludes base_value and first delta
-    let header = derive_delta_header(0, subsequent_deltas_len + 2);
-    writer.put_slice(&header);
-
-    write_varint_zigzagged::<_, S>(writer, base_value);
-    // First delta always signed to indicate increasing/decreasing sequence
-    write_varint_zigzagged::<_, SignedEncoding>(writer, fixed_delta);
-}
-
-fn derive_delta_header(delta_width: usize, run_length: usize) -> [u8; 2] {
-    debug_assert!(
-        (1..=MAX_RUN_LENGTH).contains(&run_length),
-        "delta run length cannot exceed 512 values"
-    );
-    // [1, 512] to [0, 511]
-    let run_length = run_length as u16 - 1;
-    // 0 is special value to indicate fixed delta
-    let delta_width = if delta_width == 0 {
-        0
-    } else {
-        rle_v2_encode_bit_width(delta_width)
-    };
-    // No need to mask as we guarantee max length is 512
-    let encoded_length_high_bit = (run_length >> 8) as u8;
-    let encoded_length_low_bits = (run_length & 0xFF) as u8;
-
-    let header1 = EncodingType::Delta.to_header() | delta_width << 1 | encoded_length_high_bit;
-    let header2 = encoded_length_low_bits;
-
-    [header1, header2]
-}
-
-#[cfg(test)]
-mod tests {
-    use std::io::Cursor;
-
-    use crate::encoding::integer::UnsignedEncoding;
-
-    use super::*;
-
-    // TODO: figure out how to write proptests for these
-
-    #[test]
-    fn test_fixed_delta_positive() {
-        let mut buf = BytesMut::new();
-        let mut out = vec![];
-        let mut deltas = vec![];
-        write_fixed_delta::<i64, UnsignedEncoding>(&mut buf, 0, 10, 100 - 2);
-        let header = buf[0];
-        read_delta_values::<i64, _, UnsignedEncoding>(
-            &mut Cursor::new(&buf[1..]),
-            &mut out,
-            &mut deltas,
-            header,
-        )
-        .unwrap();
-
-        let expected = (0..100).map(|i| i * 10).collect::<Vec<i64>>();
-        assert_eq!(expected, out);
-    }
-
-    #[test]
-    fn test_fixed_delta_negative() {
-        let mut buf = BytesMut::new();
-        let mut out = vec![];
-        let mut deltas = vec![];
-        write_fixed_delta::<i64, UnsignedEncoding>(&mut buf, 10_000, -63, 150 - 2);
-        let header = buf[0];
-        read_delta_values::<i64, _, UnsignedEncoding>(
-            &mut Cursor::new(&buf[1..]),
-            &mut out,
-            &mut deltas,
-            header,
-        )
-        .unwrap();
-
-        let expected = (0..150).map(|i| 10_000 - i * 63).collect::<Vec<i64>>();
-        assert_eq!(expected, out);
-    }
-
-    #[test]
-    fn test_varying_delta_positive() {
-        let deltas = [
-            1, 6, 98, 12, 65, 9, 0, 0, 1, 128, 643, 129, 469, 123, 4572, 124,
-        ];
-        let max = *deltas.iter().max().unwrap();
-
-        let mut buf = BytesMut::new();
-        let mut out = vec![];
-        let mut deltas = vec![];
-        write_varying_delta::<i64, UnsignedEncoding>(&mut buf, 0, 10, max, &deltas);
-        let header = buf[0];
-        read_delta_values::<i64, _, UnsignedEncoding>(
-            &mut Cursor::new(&buf[1..]),
-            &mut out,
-            &mut deltas,
-            header,
-        )
-        .unwrap();
-
-        let mut expected = vec![0, 10];
-        let mut i = 1;
-        for d in deltas {
-            expected.push(d + expected[i]);
-            i += 1;
-        }
-        assert_eq!(expected, out);
-    }
-
-    #[test]
-    fn test_varying_delta_negative() {
-        let deltas = [
-            1, 6, 98, 12, 65, 9, 0, 0, 1, 128, 643, 129, 469, 123, 4572, 124,
-        ];
-        let max = *deltas.iter().max().unwrap();
-
-        let mut buf = BytesMut::new();
-        let mut out = vec![];
-        let mut deltas = vec![];
-        write_varying_delta::<i64, UnsignedEncoding>(&mut buf, 10_000, -1, max, &deltas);
-        let header = buf[0];
-        read_delta_values::<i64, _, UnsignedEncoding>(
-            &mut Cursor::new(&buf[1..]),
-            &mut out,
-            &mut deltas,
-            header,
-        )
-        .unwrap();
-
-        let mut expected = vec![10_000, 9_999];
-        let mut i = 1;
-        for d in deltas {
-            expected.push(expected[i] - d);
-            i += 1;
-        }
-        assert_eq!(expected, out);
-    }
-}
diff --git a/src/encoding/integer/rle_v2/direct.rs b/src/encoding/integer/rle_v2/direct.rs
deleted file mode 100644
index f05ec859..00000000
--- a/src/encoding/integer/rle_v2/direct.rs
+++ /dev/null
@@ -1,158 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::io::Read;
-
-use bytes::{BufMut, BytesMut};
-
-use crate::{
-    encoding::{
-        integer::{
-            rle_v2::{EncodingType, MAX_RUN_LENGTH},
-            util::{
-                extract_run_length_from_header, read_ints, rle_v2_decode_bit_width,
-                rle_v2_encode_bit_width, write_aligned_packed_ints,
-            },
-            EncodingSign,
-        },
-        util::read_u8,
-    },
-    error::{OutOfSpecSnafu, Result},
-};
-
-use super::NInt;
-
-pub fn read_direct_values<N: NInt, R: Read, S: EncodingSign>(
-    reader: &mut R,
-    out_ints: &mut Vec<N>,
-    header: u8,
-) -> Result<()> {
-    let encoded_bit_width = (header >> 1) & 0x1F;
-    let bit_width = rle_v2_decode_bit_width(encoded_bit_width);
-
-    if (N::BYTE_SIZE * 8) < bit_width {
-        return OutOfSpecSnafu {
-            msg: "byte width of direct encoding exceeds byte size of integer being decoded to",
-        }
-        .fail();
-    }
-
-    let second_byte = read_u8(reader)?;
-    let length = extract_run_length_from_header(header, second_byte);
-
-    // Write the unpacked values and zigzag decode to result buffer
-    read_ints(out_ints, length, bit_width, reader)?;
-
-    for lit in out_ints.iter_mut() {
-        *lit = S::zigzag_decode(*lit);
-    }
-
-    Ok(())
-}
-
-/// `values` and `max` must be zigzag encoded. If `max` is not provided, it is derived
-/// by iterating over `values`.
-pub fn write_direct<N: NInt>(writer: &mut BytesMut, values: &[N], max: Option<N>) {
-    debug_assert!(
-        (1..=MAX_RUN_LENGTH).contains(&values.len()),
-        "direct run length cannot exceed 512 values"
-    );
-
-    let max = max.unwrap_or_else(|| {
-        // Assert guards that values is non-empty
-        *values.iter().max_by_key(|x| x.bits_used()).unwrap()
-    });
-
-    let bit_width = max.closest_aligned_bit_width();
-    let encoded_bit_width = rle_v2_encode_bit_width(bit_width);
-    // From [1, 512] to [0, 511]
-    let encoded_length = values.len() as u16 - 1;
-    // No need to mask as we guarantee max length is 512
-    let encoded_length_high_bit = (encoded_length >> 8) as u8;
-    let encoded_length_low_bits = (encoded_length & 0xFF) as u8;
-
-    let header1 =
-        EncodingType::Direct.to_header() | (encoded_bit_width << 1) | encoded_length_high_bit;
-    let header2 = encoded_length_low_bits;
-
-    writer.put_u8(header1);
-    writer.put_u8(header2);
-    write_aligned_packed_ints(writer, bit_width, values);
-}
-
-#[cfg(test)]
-mod tests {
-    use std::io::Cursor;
-
-    use proptest::prelude::*;
-
-    use crate::encoding::integer::{SignedEncoding, UnsignedEncoding};
-
-    use super::*;
-
-    fn roundtrip_direct_helper<N: NInt, S: EncodingSign>(values: &[N]) -> Result<Vec<N>> {
-        let mut buf = BytesMut::new();
-        let mut out = vec![];
-
-        write_direct(&mut buf, values, None);
-        let header = buf[0];
-        read_direct_values::<_, _, S>(&mut Cursor::new(&buf[1..]), &mut out, header)?;
-
-        Ok(out)
-    }
-
-    #[test]
-    fn test_direct_edge_case() {
-        let values: Vec<i16> = vec![109, -17809, -29946, -17285];
-        let encoded = values
-            .iter()
-            .map(|&v| SignedEncoding::zigzag_encode(v))
-            .collect::<Vec<_>>();
-        let out = roundtrip_direct_helper::<_, SignedEncoding>(&encoded).unwrap();
-        assert_eq!(out, values);
-    }
-
-    proptest! {
-        #[test]
-        fn roundtrip_direct_i16(values in prop::collection::vec(any::<i16>(), 1..=512)) {
-            let encoded = values.iter().map(|v| SignedEncoding::zigzag_encode(*v)).collect::<Vec<_>>();
-            let out = roundtrip_direct_helper::<_, SignedEncoding>(&encoded)?;
-            prop_assert_eq!(out, values);
-        }
-
-        #[test]
-        fn roundtrip_direct_i32(values in prop::collection::vec(any::<i32>(), 1..=512)) {
-            let encoded = values.iter().map(|v| SignedEncoding::zigzag_encode(*v)).collect::<Vec<_>>();
-            let out = roundtrip_direct_helper::<_, SignedEncoding>(&encoded)?;
-            prop_assert_eq!(out, values);
-        }
-
-        #[test]
-        fn roundtrip_direct_i64(values in prop::collection::vec(any::<i64>(), 1..=512)) {
-            let encoded = values.iter().map(|v| SignedEncoding::zigzag_encode(*v)).collect::<Vec<_>>();
-            let out = roundtrip_direct_helper::<_, SignedEncoding>(&encoded)?;
-            prop_assert_eq!(out, values);
-        }
-
-        #[test]
-        fn roundtrip_direct_i64_unsigned(values in prop::collection::vec(0..=i64::MAX, 1..=512)) {
-            let encoded = values.iter().map(|v| UnsignedEncoding::zigzag_encode(*v)).collect::<Vec<_>>();
-            let out = roundtrip_direct_helper::<_, UnsignedEncoding>(&encoded)?;
-            prop_assert_eq!(out, values);
-        }
-    }
-}
diff --git a/src/encoding/integer/rle_v2/mod.rs b/src/encoding/integer/rle_v2/mod.rs
deleted file mode 100644
index ed871cf8..00000000
--- a/src/encoding/integer/rle_v2/mod.rs
+++ /dev/null
@@ -1,677 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{io::Read, marker::PhantomData};
-
-use bytes::BytesMut;
-
-use crate::{
-    encoding::{rle::GenericRle, util::try_read_u8, PrimitiveValueEncoder},
-    error::Result,
-    memory::EstimateMemory,
-};
-
-use self::{
-    delta::{read_delta_values, write_fixed_delta, write_varying_delta},
-    direct::{read_direct_values, write_direct},
-    patched_base::{read_patched_base, write_patched_base},
-    short_repeat::{read_short_repeat_values, write_short_repeat},
-};
-
-use super::{util::calculate_percentile_bits, EncodingSign, NInt, VarintSerde};
-
-mod delta;
-mod direct;
-mod patched_base;
-mod short_repeat;
-
-const MAX_RUN_LENGTH: usize = 512;
-/// Minimum number of repeated values required to use Short Repeat sub-encoding
-const SHORT_REPEAT_MIN_LENGTH: usize = 3;
-const SHORT_REPEAT_MAX_LENGTH: usize = 10;
-const BASE_VALUE_LIMIT: i64 = 1 << 56;
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-// TODO: put header data in here, e.g. base value, len, etc.
-enum EncodingType {
-    ShortRepeat,
-    Direct,
-    PatchedBase,
-    Delta,
-}
-
-impl EncodingType {
-    /// Checking highest two bits for encoding type.
-    #[inline]
-    fn from_header(header: u8) -> Self {
-        match header & 0b_1100_0000 {
-            0b_1100_0000 => Self::Delta,
-            0b_1000_0000 => Self::PatchedBase,
-            0b_0100_0000 => Self::Direct,
-            0b_0000_0000 => Self::ShortRepeat,
-            _ => unreachable!(),
-        }
-    }
-
-    /// Return byte with highest two bits set according to variant.
-    #[inline]
-    fn to_header(self) -> u8 {
-        match self {
-            EncodingType::Delta => 0b_1100_0000,
-            EncodingType::PatchedBase => 0b_1000_0000,
-            EncodingType::Direct => 0b_0100_0000,
-            EncodingType::ShortRepeat => 0b_0000_0000,
-        }
-    }
-}
-
-pub struct RleV2Decoder<N: NInt, R: Read, S: EncodingSign> {
-    reader: R,
-    decoded_ints: Vec<N>,
-    /// Indexes into decoded_ints to make it act like a queue
-    current_head: usize,
-    deltas: Vec<i64>,
-    sign: PhantomData<S>,
-}
-
-impl<N: NInt, R: Read, S: EncodingSign> RleV2Decoder<N, R, S> {
-    pub fn new(reader: R) -> Self {
-        Self {
-            reader,
-            decoded_ints: Vec::with_capacity(MAX_RUN_LENGTH),
-            current_head: 0,
-            deltas: Vec::with_capacity(MAX_RUN_LENGTH),
-            sign: Default::default(),
-        }
-    }
-}
-
-impl<N: NInt, R: Read, S: EncodingSign> GenericRle<N> for RleV2Decoder<N, R, S> {
-    fn advance(&mut self, n: usize) {
-        self.current_head += n;
-    }
-
-    fn available(&self) -> &[N] {
-        &self.decoded_ints[self.current_head..]
-    }
-
-    fn decode_batch(&mut self) -> Result<()> {
-        self.current_head = 0;
-        self.decoded_ints.clear();
-        let header = match try_read_u8(&mut self.reader)? {
-            Some(byte) => byte,
-            None => return Ok(()),
-        };
-
-        match EncodingType::from_header(header) {
-            EncodingType::ShortRepeat => read_short_repeat_values::<_, _, S>(
-                &mut self.reader,
-                &mut self.decoded_ints,
-                header,
-            )?,
-            EncodingType::Direct => {
-                read_direct_values::<_, _, S>(&mut self.reader, &mut self.decoded_ints, header)?
-            }
-            EncodingType::PatchedBase => {
-                read_patched_base::<_, _, S>(&mut self.reader, &mut self.decoded_ints, header)?
-            }
-            EncodingType::Delta => read_delta_values::<_, _, S>(
-                &mut self.reader,
-                &mut self.decoded_ints,
-                &mut self.deltas,
-                header,
-            )?,
-        }
-
-        Ok(())
-    }
-}
-
-struct DeltaEncodingCheckResult<N: NInt> {
-    base_value: N,
-    min: N,
-    max: N,
-    first_delta: i64,
-    max_delta: i64,
-    is_monotonic: bool,
-    is_fixed_delta: bool,
-    adjacent_deltas: Vec<i64>,
-}
-
-/// Calculate the necessary values to determine if sequence can be delta encoded.
-fn delta_encoding_check<N: NInt>(literals: &[N]) -> DeltaEncodingCheckResult<N> {
-    let base_value = literals[0];
-    let mut min = base_value.min(literals[1]);
-    let mut max = base_value.max(literals[1]);
-    // Saturating should be fine here (and below) as we later check the
-    // difference between min & max and defer to direct encoding if it
-    // is too large (so the corrupt delta here won't actually be used).
-    // TODO: is there a more explicit way of ensuring this behaviour?
-    let first_delta = literals[1].as_i64().saturating_sub(base_value.as_i64());
-    let mut current_delta;
-    let mut max_delta = 0;
-
-    let mut is_increasing = first_delta.is_positive();
-    let mut is_decreasing = first_delta.is_negative();
-    let mut is_fixed_delta = true;
-
-    let mut adjacent_deltas = vec![];
-
-    // We've already preprocessed the first step above
-    for i in 2..literals.len() {
-        let l1 = literals[i];
-        let l0 = literals[i - 1];
-
-        min = min.min(l1);
-        max = max.max(l1);
-
-        current_delta = l1.as_i64().saturating_sub(l0.as_i64());
-
-        is_increasing &= current_delta >= 0;
-        is_decreasing &= current_delta <= 0;
-
-        is_fixed_delta &= current_delta == first_delta;
-        let current_delta = current_delta.saturating_abs();
-        adjacent_deltas.push(current_delta);
-        max_delta = max_delta.max(current_delta);
-    }
-    let is_monotonic = is_increasing || is_decreasing;
-
-    DeltaEncodingCheckResult {
-        base_value,
-        min,
-        max,
-        first_delta,
-        max_delta,
-        is_monotonic,
-        is_fixed_delta,
-        adjacent_deltas,
-    }
-}
-
-/// Runs are guaranteed to have length > 1.
-#[derive(Debug, Clone, Eq, PartialEq)]
-enum RleV2EncodingState<N: NInt> {
-    /// When buffer is empty and no values to encode.
-    Empty,
-    /// Special state for first value as we determine after the first
-    /// value whether to go fixed or variable run.
-    One(N),
-    /// Run of identical value of specified count.
-    FixedRun { value: N, count: usize },
-    /// Run of variable values.
-    VariableRun { literals: Vec<N> },
-}
-
-impl<N: NInt> Default for RleV2EncodingState<N> {
-    fn default() -> Self {
-        Self::Empty
-    }
-}
-
-pub struct RleV2Encoder<N: NInt, S: EncodingSign> {
-    /// Stores the run length encoded sequences.
-    data: BytesMut,
-    /// Used in state machine for determining which sub-encoding
-    /// for a sequence to use.
-    state: RleV2EncodingState<N>,
-    phantom: PhantomData<S>,
-}
-
-impl<N: NInt, S: EncodingSign> RleV2Encoder<N, S> {
-    // Algorithm adapted from:
-    // https://github.com/apache/orc/blob/main/java/core/src/java/org/apache/orc/impl/RunLengthIntegerWriterV2.java
-
-    /// Process each value to build up knowledge to determine which encoding to use. We attempt
-    /// to identify runs of identical values (fixed runs), otherwise falling back to variable
-    /// runs (varying values).
-    ///
-    /// When in a fixed run state, as long as identical values are found, we keep incrementing
-    /// the run length up to a maximum of 512, flushing to fixed delta run if so. If we encounter
-    /// a differing value, we flush to short repeat or fixed delta depending on the length and
-    /// reset the state (if the current run is small enough, we switch direct to variable run).
-    ///
-    /// When in a variable run state, if we find 3 identical values in a row as the latest values,
-    /// we flush the variable run to a sub-encoding then switch to fixed run, otherwise continue
-    /// incrementing the run length up to a max length of 512, before flushing and resetting the
-    /// state. For a variable run, extra logic must take place to determine which sub-encoding to
-    /// use when flushing, see [`Self::determine_variable_run_encoding`] for more details.
-    fn process_value(&mut self, value: N) {
-        match &mut self.state {
-            // When we start, or when a run was flushed to a sub-encoding
-            RleV2EncodingState::Empty => {
-                self.state = RleV2EncodingState::One(value);
-            }
-            // Here we determine if we look like we're in a fixed run or variable run
-            RleV2EncodingState::One(one_value) => {
-                if value == *one_value {
-                    self.state = RleV2EncodingState::FixedRun { value, count: 2 };
-                } else {
-                    // TODO: alloc here
-                    let mut literals = Vec::with_capacity(MAX_RUN_LENGTH);
-                    literals.push(*one_value);
-                    literals.push(value);
-                    self.state = RleV2EncodingState::VariableRun { literals };
-                }
-            }
-            // When we're in a run of identical values
-            RleV2EncodingState::FixedRun {
-                value: fixed_value,
-                count,
-            } => {
-                if value == *fixed_value {
-                    // Continue fixed run, flushing to delta when max length reached
-                    *count += 1;
-                    if *count == MAX_RUN_LENGTH {
-                        write_fixed_delta::<_, S>(&mut self.data, value, 0, *count - 2);
-                        self.state = RleV2EncodingState::Empty;
-                    }
-                } else {
-                    // If fixed run is broken by a different value.
-                    match count {
-                        // Note that count cannot be 0 or 1 here as that is encoded
-                        // by Empty and One states in self.state
-                        2 => {
-                            // If fixed run is smaller than short repeat then just include
-                            // it at the start of the variable run we're switching to.
-                            // TODO: alloc here
-                            let mut literals = Vec::with_capacity(MAX_RUN_LENGTH);
-                            literals.push(*fixed_value);
-                            literals.push(*fixed_value);
-                            literals.push(value);
-                            self.state = RleV2EncodingState::VariableRun { literals };
-                        }
-                        SHORT_REPEAT_MIN_LENGTH..=SHORT_REPEAT_MAX_LENGTH => {
-                            // If we have enough values for a Short Repeat, then encode as
-                            // such.
-                            write_short_repeat::<_, S>(&mut self.data, *fixed_value, *count);
-                            self.state = RleV2EncodingState::One(value);
-                        }
-                        _ => {
-                            // Otherwise if too large, use Delta encoding.
-                            write_fixed_delta::<_, S>(&mut self.data, *fixed_value, 0, *count - 2);
-                            self.state = RleV2EncodingState::One(value);
-                        }
-                    }
-                }
-            }
-            // When we're in a run of varying values
-            RleV2EncodingState::VariableRun { literals } => {
-                let length = literals.len();
-                let last_value = literals[length - 1];
-                let second_last_value = literals[length - 2];
-                if value == last_value && value == second_last_value {
-                    // Last 3 values (including current new one) are identical. Break the current
-                    // variable run, flushing it to a sub-encoding, then switch to a fixed run
-                    // state.
-
-                    // Pop off the last two values (which are identical to value) and flush
-                    // the variable run to writer
-                    literals.truncate(literals.len() - 2);
-                    determine_variable_run_encoding::<_, S>(&mut self.data, literals);
-
-                    self.state = RleV2EncodingState::FixedRun { value, count: 3 };
-                } else {
-                    // Continue variable run, flushing sub-encoding if max length reached
-                    literals.push(value);
-                    if literals.len() == MAX_RUN_LENGTH {
-                        determine_variable_run_encoding::<_, S>(&mut self.data, literals);
-                        self.state = RleV2EncodingState::Empty;
-                    }
-                }
-            }
-        }
-    }
-
-    /// Flush any buffered values to the writer.
-    fn flush(&mut self) {
-        let state = std::mem::take(&mut self.state);
-        match state {
-            RleV2EncodingState::Empty => {}
-            RleV2EncodingState::One(value) => {
-                let value = S::zigzag_encode(value);
-                write_direct(&mut self.data, &[value], Some(value));
-            }
-            RleV2EncodingState::FixedRun { value, count: 2 } => {
-                // Direct has smallest overhead
-                let value = S::zigzag_encode(value);
-                write_direct(&mut self.data, &[value, value], Some(value));
-            }
-            RleV2EncodingState::FixedRun { value, count } if count <= SHORT_REPEAT_MAX_LENGTH => {
-                // Short repeat must have length [3, 10]
-                write_short_repeat::<_, S>(&mut self.data, value, count);
-            }
-            RleV2EncodingState::FixedRun { value, count } => {
-                write_fixed_delta::<_, S>(&mut self.data, value, 0, count - 2);
-            }
-            RleV2EncodingState::VariableRun { mut literals } => {
-                determine_variable_run_encoding::<_, S>(&mut self.data, &mut literals);
-            }
-        }
-    }
-}
-
-impl<N: NInt, S: EncodingSign> EstimateMemory for RleV2Encoder<N, S> {
-    fn estimate_memory_size(&self) -> usize {
-        self.data.len()
-    }
-}
-
-impl<N: NInt, S: EncodingSign> PrimitiveValueEncoder<N> for RleV2Encoder<N, S> {
-    fn new() -> Self {
-        Self {
-            data: BytesMut::new(),
-            state: RleV2EncodingState::Empty,
-            phantom: Default::default(),
-        }
-    }
-
-    fn write_one(&mut self, value: N) {
-        self.process_value(value);
-    }
-
-    fn take_inner(&mut self) -> bytes::Bytes {
-        self.flush();
-        std::mem::take(&mut self.data).into()
-    }
-}
-
-fn determine_variable_run_encoding<N: NInt, S: EncodingSign>(
-    writer: &mut BytesMut,
-    literals: &mut [N],
-) {
-    // Direct will have smallest overhead for tiny runs
-    if literals.len() <= SHORT_REPEAT_MIN_LENGTH {
-        for v in literals.iter_mut() {
-            *v = S::zigzag_encode(*v);
-        }
-        write_direct(writer, literals, None);
-        return;
-    }
-
-    // Invariant: literals.len() > 3
-    let DeltaEncodingCheckResult {
-        base_value,
-        min,
-        max,
-        first_delta,
-        max_delta,
-        is_monotonic,
-        is_fixed_delta,
-        adjacent_deltas,
-    } = delta_encoding_check(literals);
-
-    // Quick check for delta overflow, if so just move to Direct as it has less
-    // overhead than Patched Base.
-    // TODO: should min/max be N or i64 here?
-    if max.checked_sub(&min).is_none() {
-        for v in literals.iter_mut() {
-            *v = S::zigzag_encode(*v);
-        }
-        write_direct(writer, literals, None);
-        return;
-    }
-
-    // Any subtractions here on are safe due to above check
-
-    if is_fixed_delta {
-        write_fixed_delta::<_, S>(writer, literals[0], first_delta, literals.len() - 2);
-        return;
-    }
-
-    // First delta used to indicate if increasing or decreasing, so must be non-zero
-    if first_delta != 0 && is_monotonic {
-        write_varying_delta::<_, S>(writer, base_value, first_delta, max_delta, &adjacent_deltas);
-        return;
-    }
-
-    // In Java implementation, Patched Base encoding base value cannot exceed 56
-    // bits in value otherwise it can overflow the maximum 8 bytes used to encode
-    // the value when signed MSB encoding is used (adds an extra bit).
-    let min = min.as_i64();
-    if min.abs() >= BASE_VALUE_LIMIT && min != i64::MIN {
-        for v in literals.iter_mut() {
-            *v = S::zigzag_encode(*v);
-        }
-        write_direct(writer, literals, None);
-        return;
-    }
-
-    // TODO: another allocation here
-    let zigzag_literals = literals
-        .iter()
-        .map(|&v| S::zigzag_encode(v))
-        .collect::<Vec<_>>();
-    let zigzagged_90_percentile_bit_width = calculate_percentile_bits(&zigzag_literals, 0.90);
-    // TODO: can derive from min/max?
-    let zigzagged_100_percentile_bit_width = calculate_percentile_bits(&zigzag_literals, 1.00);
-    // If variation of bit width between largest value and lower 90% of values isn't
-    // significant enough, just use direct encoding as patched base wouldn't be as
-    // efficient.
-    if (zigzagged_100_percentile_bit_width.saturating_sub(zigzagged_90_percentile_bit_width)) <= 1 {
-        // TODO: pass through the 100p here
-        write_direct(writer, &zigzag_literals, None);
-        return;
-    }
-
-    // Base value for patched base is the minimum value
-    // Patch data values are the literals with the base value subtracted
-    // We use base_reduced_literals to store these base reduced literals
-    let mut max_data_value = 0;
-    let mut base_reduced_literals = vec![];
-    for l in literals.iter() {
-        // All base reduced literals become positive here
-        let base_reduced_literal = l.as_i64() - min;
-        base_reduced_literals.push(base_reduced_literal);
-        max_data_value = max_data_value.max(base_reduced_literal);
-    }
-
-    // Aka 100th percentile
-    let base_reduced_literals_max_bit_width = max_data_value.closest_aligned_bit_width();
-    // 95th percentile width is used to find the 5% of values to encode with patches
-    let base_reduced_literals_95th_percentile_bit_width =
-        calculate_percentile_bits(&base_reduced_literals, 0.95);
-
-    // Patch only if we have outliers, based on bit width
-    if base_reduced_literals_max_bit_width != base_reduced_literals_95th_percentile_bit_width {
-        write_patched_base(
-            writer,
-            &mut base_reduced_literals,
-            min,
-            base_reduced_literals_max_bit_width,
-            base_reduced_literals_95th_percentile_bit_width,
-        );
-    } else {
-        // TODO: pass through the 100p here
-        write_direct(writer, &zigzag_literals, None);
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use std::io::Cursor;
-
-    use proptest::prelude::*;
-
-    use crate::encoding::{
-        integer::{SignedEncoding, UnsignedEncoding},
-        PrimitiveValueDecoder,
-    };
-
-    use super::*;
-
-    // TODO: have tests varying the out buffer, to ensure decode() is called
-    //       multiple times
-
-    fn test_helper<S: EncodingSign>(data: &[u8], expected: &[i64]) {
-        let mut reader = RleV2Decoder::<i64, _, S>::new(Cursor::new(data));
-        let mut actual = vec![0; expected.len()];
-        reader.decode(&mut actual).unwrap();
-        assert_eq!(actual, expected);
-    }
-
-    #[test]
-    fn reader_test() {
-        let data = [2, 1, 64, 5, 80, 1, 1];
-        let expected = [1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1];
-        test_helper::<UnsignedEncoding>(&data, &expected);
-
-        // direct
-        let data = [0x5e, 0x03, 0x5c, 0xa1, 0xab, 0x1e, 0xde, 0xad, 0xbe, 0xef];
-        let expected = [23713, 43806, 57005, 48879];
-        test_helper::<UnsignedEncoding>(&data, &expected);
-
-        // patched base
-        let data = [
-            102, 9, 0, 126, 224, 7, 208, 0, 126, 79, 66, 64, 0, 127, 128, 8, 2, 0, 128, 192, 8, 22,
-            0, 130, 0, 8, 42,
-        ];
-        let expected = [
-            2030, 2000, 2020, 1000000, 2040, 2050, 2060, 2070, 2080, 2090,
-        ];
-        test_helper::<UnsignedEncoding>(&data, &expected);
-
-        let data = [196, 9, 2, 2, 74, 40, 166];
-        let expected = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29];
-        test_helper::<UnsignedEncoding>(&data, &expected);
-
-        let data = [0xc6, 0x09, 0x02, 0x02, 0x22, 0x42, 0x42, 0x46];
-        let expected = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29];
-        test_helper::<UnsignedEncoding>(&data, &expected);
-
-        let data = [7, 1];
-        let expected = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
-        test_helper::<UnsignedEncoding>(&data, &expected);
-    }
-
-    #[test]
-    fn short_repeat() {
-        let data = [0x0a, 0x27, 0x10];
-        let expected = [10000, 10000, 10000, 10000, 10000];
-        test_helper::<UnsignedEncoding>(&data, &expected);
-    }
-
-    #[test]
-    fn direct() {
-        let data = [0x5e, 0x03, 0x5c, 0xa1, 0xab, 0x1e, 0xde, 0xad, 0xbe, 0xef];
-        let expected = [23713, 43806, 57005, 48879];
-        test_helper::<UnsignedEncoding>(&data, &expected);
-    }
-
-    #[test]
-    fn direct_signed() {
-        let data = [110, 3, 0, 185, 66, 1, 86, 60, 1, 189, 90, 1, 125, 222];
-        let expected = [23713, 43806, 57005, 48879];
-        test_helper::<SignedEncoding>(&data, &expected);
-    }
-
-    #[test]
-    fn delta() {
-        let data = [0xc6, 0x09, 0x02, 0x02, 0x22, 0x42, 0x42, 0x46];
-        let expected = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29];
-        test_helper::<UnsignedEncoding>(&data, &expected);
-    }
-
-    #[test]
-    fn patched_base() {
-        let data = [
-            0x8e, 0x09, 0x2b, 0x21, 0x07, 0xd0, 0x1e, 0x00, 0x14, 0x70, 0x28, 0x32, 0x3c, 0x46,
-            0x50, 0x5a, 0xfc, 0xe8,
-        ];
-        let expected = [
-            2030, 2000, 2020, 1000000, 2040, 2050, 2060, 2070, 2080, 2090,
-        ];
-        test_helper::<UnsignedEncoding>(&data, &expected);
-    }
-
-    #[test]
-    fn patched_base_1() {
-        let data = vec![
-            144, 109, 4, 164, 141, 16, 131, 194, 0, 240, 112, 64, 60, 84, 24, 3, 193, 201, 128,
-            120, 60, 33, 4, 244, 3, 193, 192, 224, 128, 56, 32, 15, 22, 131, 129, 225, 0, 112, 84,
-            86, 14, 8, 106, 193, 192, 228, 160, 64, 32, 14, 213, 131, 193, 192, 240, 121, 124, 30,
-            18, 9, 132, 67, 0, 224, 120, 60, 28, 14, 32, 132, 65, 192, 240, 160, 56, 61, 91, 7, 3,
-            193, 192, 240, 120, 76, 29, 23, 7, 3, 220, 192, 240, 152, 60, 52, 15, 7, 131, 129, 225,
-            0, 144, 56, 30, 14, 44, 140, 129, 194, 224, 120, 0, 28, 15, 8, 6, 129, 198, 144, 128,
-            104, 36, 27, 11, 38, 131, 33, 48, 224, 152, 60, 111, 6, 183, 3, 112, 0, 1, 78, 5, 46,
-            2, 1, 1, 141, 3, 1, 1, 138, 22, 0, 65, 1, 4, 0, 225, 16, 209, 192, 4, 16, 8, 36, 16, 3,
-            48, 1, 3, 13, 33, 0, 176, 0, 1, 94, 18, 0, 68, 0, 33, 1, 143, 0, 1, 7, 93, 0, 25, 0, 5,
-            0, 2, 0, 4, 0, 1, 0, 1, 0, 2, 0, 16, 0, 1, 11, 150, 0, 3, 0, 1, 0, 1, 99, 157, 0, 1,
-            140, 54, 0, 162, 1, 130, 0, 16, 112, 67, 66, 0, 2, 4, 0, 0, 224, 0, 1, 0, 16, 64, 16,
-            91, 198, 1, 2, 0, 32, 144, 64, 0, 12, 2, 8, 24, 0, 64, 0, 1, 0, 0, 8, 48, 51, 128, 0,
-            2, 12, 16, 32, 32, 71, 128, 19, 76,
-        ];
-        // expected data generated from Orc Java implementation
-        let expected = vec![
-            20, 2, 3, 2, 1, 3, 17, 71, 35, 2, 1, 139, 2, 2, 3, 1783, 475, 2, 1, 1, 3, 1, 3, 2, 32,
-            1, 2, 3, 1, 8, 30, 1, 3, 414, 1, 1, 135, 3, 3, 1, 414, 2, 1, 2, 2, 594, 2, 5, 6, 4, 11,
-            1, 2, 2, 1, 1, 52, 4, 1, 2, 7, 1, 17, 334, 1, 2, 1, 2, 2, 6, 1, 266, 1, 2, 217, 2, 6,
-            2, 13, 2, 2, 1, 2, 3, 5, 1, 2, 1, 7244, 11813, 1, 33, 2, -13, 1, 2, 3, 13, 1, 92, 3,
-            13, 5, 14, 9, 141, 12, 6, 15, 25, -1, -1, -1, 23, 1, -1, -1, -71, -2, -1, -1, -1, -1,
-            2, 1, 4, 34, 5, 78, 8, 1, 2, 2, 1, 9, 10, 2, 1, 4, 13, 1, 5, 4, 4, 19, 5, -1, -1, -1,
-            34, -17, -200, -1, -943, -13, -3, 1, 2, -1, -1, 1, 8, -1, 1483, -2, -1, -1, -12751, -1,
-            -1, -1, 66, 1, 3, 8, 131, 14, 5, 1, 2, 2, 1, 1, 8, 1, 1, 2, 1, 5, 9, 2, 3, 112, 13, 2,
-            2, 1, 5, 10, 3, 1, 1, 13, 2, 3, 4, 1, 3, 1, 1, 2, 1, 1, 2, 4, 2, 207, 1, 1, 2, 4, 3, 3,
-            2, 2, 16,
-        ];
-        test_helper::<SignedEncoding>(&data, &expected);
-    }
-
-    // TODO: be smarter about prop test here, generate different patterns of ints
-    //        - e.g. increasing/decreasing sequences, outliers, repeated
-    //        - to ensure all different subencodings are being used (and might make shrinking better)
-    //       currently 99% of the time here the subencoding will be Direct due to random generation
-
-    fn roundtrip_helper<N: NInt, S: EncodingSign>(values: &[N]) -> Result<Vec<N>> {
-        let mut writer = RleV2Encoder::<N, S>::new();
-        writer.write_slice(values);
-        let data = writer.take_inner();
-
-        let mut reader = RleV2Decoder::<N, _, S>::new(Cursor::new(data));
-        let mut actual = vec![N::zero(); values.len()];
-        reader.decode(&mut actual).unwrap();
-
-        Ok(actual)
-    }
-
-    proptest! {
-        #[test]
-        fn roundtrip_i16(values in prop::collection::vec(any::<i16>(), 1..1_000)) {
-            let out = roundtrip_helper::<_, SignedEncoding>(&values)?;
-            prop_assert_eq!(out, values);
-        }
-
-        #[test]
-        fn roundtrip_i32(values in prop::collection::vec(any::<i32>(), 1..1_000)) {
-            let out = roundtrip_helper::<_, SignedEncoding>(&values)?;
-            prop_assert_eq!(out, values);
-        }
-
-        #[test]
-        fn roundtrip_i64(values in prop::collection::vec(any::<i64>(), 1..1_000)) {
-            let out = roundtrip_helper::<_, SignedEncoding>(&values)?;
-            prop_assert_eq!(out, values);
-        }
-
-        #[test]
-        fn roundtrip_i64_unsigned(values in prop::collection::vec(0..=i64::MAX, 1..1_000)) {
-            let out = roundtrip_helper::<_, UnsignedEncoding>(&values)?;
-            prop_assert_eq!(out, values);
-        }
-    }
-}
diff --git a/src/encoding/integer/rle_v2/patched_base.rs b/src/encoding/integer/rle_v2/patched_base.rs
deleted file mode 100644
index c08800c1..00000000
--- a/src/encoding/integer/rle_v2/patched_base.rs
+++ /dev/null
@@ -1,414 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::io::Read;
-
-use bytes::{BufMut, BytesMut};
-use snafu::OptionExt;
-
-use super::{EncodingType, NInt};
-use crate::{
-    encoding::{
-        integer::{
-            util::{
-                encode_bit_width, extract_run_length_from_header, get_closest_fixed_bits,
-                read_ints, rle_v2_decode_bit_width, signed_msb_encode, write_packed_ints,
-            },
-            EncodingSign, VarintSerde,
-        },
-        util::read_u8,
-    },
-    error::{OutOfSpecSnafu, Result},
-};
-
-pub fn read_patched_base<N: NInt, R: Read, S: EncodingSign>(
-    reader: &mut R,
-    out_ints: &mut Vec<N>,
-    header: u8,
-) -> Result<()> {
-    let encoded_bit_width = (header >> 1) & 0x1F;
-    let value_bit_width = rle_v2_decode_bit_width(encoded_bit_width);
-    // Bit width derived from u8 above, so impossible to overflow u32
-    let value_bit_width_u32 = u32::try_from(value_bit_width).unwrap();
-
-    let second_byte = read_u8(reader)?;
-    let length = extract_run_length_from_header(header, second_byte);
-
-    let third_byte = read_u8(reader)?;
-    let fourth_byte = read_u8(reader)?;
-
-    // Base width is one off
-    let base_byte_width = ((third_byte >> 5) & 0x07) as usize + 1;
-
-    let patch_bit_width = rle_v2_decode_bit_width(third_byte & 0x1f);
-
-    // Patch gap width is one off
-    let patch_gap_bit_width = ((fourth_byte >> 5) & 0x07) as usize + 1;
-
-    let patch_total_bit_width = patch_bit_width + patch_gap_bit_width;
-    if patch_total_bit_width > 64 {
-        return OutOfSpecSnafu {
-            msg: "combined patch width and patch gap width cannot be greater than 64 bits",
-        }
-        .fail();
-    }
-
-    let patch_list_length = (fourth_byte & 0x1f) as usize;
-
-    let base = N::read_big_endian(reader, base_byte_width)?;
-    let base = S::decode_signed_msb(base, base_byte_width);
-
-    // Get data values
-    // TODO: this should read into Vec<i64>
-    //       as base reduced values can exceed N::max()
-    //       (e.g. if base is N::min() and this is signed type)
-    read_ints(out_ints, length, value_bit_width, reader)?;
-
-    // Get patches that will be applied to base values.
-    // At most they might be u64 in width (because of check above).
-    let ceil_patch_total_bit_width = get_closest_fixed_bits(patch_total_bit_width);
-    let mut patches: Vec<i64> = Vec::with_capacity(patch_list_length);
-    read_ints(
-        &mut patches,
-        patch_list_length,
-        ceil_patch_total_bit_width,
-        reader,
-    )?;
-
-    // TODO: document and explain below logic
-    let mut patch_index = 0;
-    let patch_mask = (1 << patch_bit_width) - 1;
-    let mut current_gap = patches[patch_index] >> patch_bit_width;
-    let mut current_patch = patches[patch_index] & patch_mask;
-    let mut actual_gap = 0;
-
-    while current_gap == 255 && current_patch == 0 {
-        actual_gap += 255;
-        patch_index += 1;
-        current_gap = patches[patch_index] >> patch_bit_width;
-        current_patch = patches[patch_index] & patch_mask;
-    }
-    actual_gap += current_gap;
-
-    for (idx, value) in out_ints.iter_mut().enumerate() {
-        if idx == actual_gap as usize {
-            let patch_bits =
-                current_patch
-                    .checked_shl(value_bit_width_u32)
-                    .context(OutOfSpecSnafu {
-                        msg: "Overflow while shifting patch bits by value_bit_width",
-                    })?;
-            // Safe conversion without loss as we check the bit width prior
-            let patch_bits = N::from_i64(patch_bits);
-            let patched_value = *value | patch_bits;
-
-            *value = patched_value.checked_add(&base).context(OutOfSpecSnafu {
-                msg: "over/underflow when decoding patched base integer",
-            })?;
-
-            patch_index += 1;
-
-            if patch_index < patches.len() {
-                current_gap = patches[patch_index] >> patch_bit_width;
-                current_patch = patches[patch_index] & patch_mask;
-                actual_gap = 0;
-
-                while current_gap == 255 && current_patch == 0 {
-                    actual_gap += 255;
-                    patch_index += 1;
-                    current_gap = patches[patch_index] >> patch_bit_width;
-                    current_patch = patches[patch_index] & patch_mask;
-                }
-
-                actual_gap += current_gap;
-                actual_gap += idx as i64;
-            }
-        } else {
-            *value = value.checked_add(&base).context(OutOfSpecSnafu {
-                msg: "over/underflow when decoding patched base integer",
-            })?;
-        }
-    }
-
-    Ok(())
-}
-
-fn derive_patches(
-    base_reduced_literals: &mut [i64],
-    patch_bits_width: usize,
-    max_base_value_bit_width: usize,
-) -> (Vec<i64>, usize) {
-    // Values with bits exceeding this mask will be patched.
-    let max_base_value_mask = (1 << max_base_value_bit_width) - 1;
-    // Used to encode gaps greater than 255 (no patch bits, just used for gap).
-    let jump_patch = 255 << patch_bits_width;
-
-    // At most 5% of values that must be patched.
-    // (Since max buffer length is 512, at most this can be 26)
-    let mut patches: Vec<i64> = Vec::with_capacity(26);
-    let mut last_patch_index = 0;
-    // Needed to determine bit width of patch gaps to encode in header.
-    let mut max_gap = 0;
-    for (idx, lit) in base_reduced_literals
-        .iter_mut()
-        .enumerate()
-        // Find all values which need to be patched (the 5% of values larger than the others)
-        .filter(|(_, &mut lit)| lit > max_base_value_mask)
-    {
-        // Convert to unsigned to ensure leftmost bits are 0
-        let patch_bits = (*lit as u64) >> max_base_value_bit_width;
-
-        // Gaps can at most be 255 (since gap bit width cannot exceed 8; in spec it states
-        // the header has only 3 bits to encode the size of the patch gap, so 8 is the largest
-        // value).
-        //
-        // Therefore if gap is found greater than 255 then we insert an empty patch with gap of 255
-        // (and the empty patch will have no effect when reading as patching using empty bits will
-        // be a no-op).
-        //
-        // Extra special case if gap is 511, we unroll into inserting two empty patches (instead of
-        // relying on a loop). Max buffer size cannot exceed 512 so this is the largest possible gap.
-        let gap = idx - last_patch_index;
-        let gap = if gap == 511 {
-            max_gap = 255;
-            patches.push(jump_patch);
-            patches.push(jump_patch);
-            1
-        } else if gap > 255 {
-            max_gap = 255;
-            patches.push(jump_patch);
-            gap - 255
-        } else {
-            max_gap = max_gap.max(gap);
-            gap
-        };
-        let patch = patch_bits | (gap << patch_bits_width) as u64;
-        patches.push(patch as i64);
-
-        last_patch_index = idx;
-
-        // Stripping patch bits
-        *lit &= max_base_value_mask;
-    }
-
-    // If only one element to be patched, and is the very first one.
-    // Patch gap width minimum is 1.
-    let patch_gap_width = if max_gap == 0 {
-        1
-    } else {
-        (max_gap as i16).bits_used()
-    };
-
-    (patches, patch_gap_width)
-}
-
-pub fn write_patched_base(
-    writer: &mut BytesMut,
-    base_reduced_literals: &mut [i64],
-    base: i64,
-    brl_100p_bit_width: usize,
-    brl_95p_bit_width: usize,
-) {
-    let patch_bits_width = brl_100p_bit_width - brl_95p_bit_width;
-    let patch_bits_width = get_closest_fixed_bits(patch_bits_width);
-    // According to spec, each patch (patch bits + gap) must be <= 64 bits.
-    // So we adjust accordingly here if we hit this edge case where patch_width
-    // is 64 bits (which would have no space for gap).
-    let (patch_bits_width, brl_95p_bit_width) = if patch_bits_width == 64 {
-        (56, 8)
-    } else {
-        (patch_bits_width, brl_95p_bit_width)
-    };
-
-    let (patches, patch_gap_width) =
-        derive_patches(base_reduced_literals, patch_bits_width, brl_95p_bit_width);
-
-    let encoded_bit_width = encode_bit_width(brl_95p_bit_width) as u8;
-
-    // [1, 512] to [0, 511]
-    let run_length = base_reduced_literals.len() as u16 - 1;
-
-    // No need to mask as we guarantee max length is 512
-    let encoded_length_high_bit = (run_length >> 8) as u8;
-    let encoded_length_low_bits = (run_length & 0xFF) as u8;
-
-    // +1 to account for sign bit
-    let base_bit_width = get_closest_fixed_bits(base.abs().bits_used() + 1);
-    let base_byte_width = base_bit_width.div_ceil(8).max(1);
-    let msb_encoded_min = signed_msb_encode(base, base_byte_width);
-    // [1, 8] to [0, 7]
-    let encoded_base_width = base_byte_width - 1;
-    let encoded_patch_bits_width = encode_bit_width(patch_bits_width);
-    let encoded_patch_gap_width = patch_gap_width - 1;
-
-    let header1 =
-        EncodingType::PatchedBase.to_header() | encoded_bit_width << 1 | encoded_length_high_bit;
-    let header2 = encoded_length_low_bits;
-    let header3 = (encoded_base_width as u8) << 5 | encoded_patch_bits_width as u8;
-    let header4 = (encoded_patch_gap_width as u8) << 5 | patches.len() as u8;
-    writer.put_slice(&[header1, header2, header3, header4]);
-
-    // Write out base value as big endian bytes
-    let base_bytes = msb_encoded_min.to_be_bytes();
-    // 8 since i64
-    let base_bytes = &base_bytes.as_ref()[8 - base_byte_width..];
-    writer.put_slice(base_bytes);
-
-    // Writing base reduced literals followed by patch list
-    let bit_width = get_closest_fixed_bits(brl_95p_bit_width);
-    write_packed_ints(writer, bit_width, base_reduced_literals);
-    let bit_width = get_closest_fixed_bits(patch_gap_width + patch_bits_width);
-    write_packed_ints(writer, bit_width, &patches);
-}
-
-#[cfg(test)]
-mod tests {
-    use std::io::Cursor;
-
-    use proptest::prelude::*;
-
-    use crate::encoding::integer::{util::calculate_percentile_bits, SignedEncoding};
-
-    use super::*;
-
-    #[derive(Debug)]
-    struct PatchesStrategy {
-        base: i64,
-        base_reduced_values: Vec<i64>,
-        patches: Vec<i64>,
-        patch_indices: Vec<usize>,
-        base_index: usize,
-    }
-
-    fn patches_strategy() -> impl Strategy<Value = PatchesStrategy> {
-        // TODO: clean this up a bit
-        prop::collection::vec(0..1_000_000_i64, 20..=512)
-            .prop_flat_map(|base_reduced_values| {
-                let base_strategy = -1_000_000_000..1_000_000_000_i64;
-                let max_patches_length = (base_reduced_values.len() as f32 * 0.05).ceil() as usize;
-                let base_reduced_values_strategy = Just(base_reduced_values);
-                let patches_strategy = prop::collection::vec(
-                    1_000_000_000_000_000..1_000_000_000_000_000_000_i64,
-                    1..=max_patches_length,
-                );
-                (
-                    base_strategy,
-                    base_reduced_values_strategy,
-                    patches_strategy,
-                )
-            })
-            .prop_flat_map(|(base, base_reduced_values, patches)| {
-                let base_strategy = Just(base);
-                // +1 for the base index, so we don't have to deduplicate separately
-                let patch_indices_strategy =
-                    prop::collection::hash_set(0..base_reduced_values.len(), patches.len() + 1);
-                let base_reduced_values_strategy = Just(base_reduced_values);
-                let patches_strategy = Just(patches);
-                (
-                    base_strategy,
-                    base_reduced_values_strategy,
-                    patches_strategy,
-                    patch_indices_strategy,
-                )
-            })
-            .prop_map(|(base, base_reduced_values, patches, patch_indices)| {
-                let mut patch_indices = patch_indices.into_iter().collect::<Vec<_>>();
-                let base_index = patch_indices.pop().unwrap();
-                PatchesStrategy {
-                    base,
-                    base_reduced_values,
-                    patches,
-                    patch_indices,
-                    base_index,
-                }
-            })
-    }
-
-    fn roundtrip_patched_base_helper(
-        base_reduced_literals: &[i64],
-        base: i64,
-        brl_95p_bit_width: usize,
-        brl_100p_bit_width: usize,
-    ) -> Result<Vec<i64>> {
-        let mut base_reduced_literals = base_reduced_literals.to_vec();
-
-        let mut buf = BytesMut::new();
-        let mut out = vec![];
-
-        write_patched_base(
-            &mut buf,
-            &mut base_reduced_literals,
-            base,
-            brl_100p_bit_width,
-            brl_95p_bit_width,
-        );
-        let header = buf[0];
-        read_patched_base::<i64, _, SignedEncoding>(&mut Cursor::new(&buf[1..]), &mut out, header)?;
-
-        Ok(out)
-    }
-
-    fn form_patched_base_values(
-        base_reduced_values: &[i64],
-        patches: &[i64],
-        patch_indices: &[usize],
-        base_index: usize,
-    ) -> Vec<i64> {
-        let mut base_reduced_values = base_reduced_values.to_vec();
-        for (&patch, &index) in patches.iter().zip(patch_indices) {
-            base_reduced_values[index] = patch;
-        }
-        // Need at least one zero to represent the base
-        base_reduced_values[base_index] = 0;
-        base_reduced_values
-    }
-
-    fn form_expected_values(base: i64, base_reduced_values: &[i64]) -> Vec<i64> {
-        base_reduced_values.iter().map(|&v| base + v).collect()
-    }
-
-    proptest! {
-        #[test]
-        fn roundtrip_patched_base_i64(patches_strategy in patches_strategy()) {
-            let PatchesStrategy {
-                base,
-                base_reduced_values,
-                patches,
-                patch_indices,
-                base_index
-            } = patches_strategy;
-            let base_reduced_values = form_patched_base_values(
-                &base_reduced_values,
-                &patches,
-                &patch_indices,
-                base_index
-            );
-            let expected = form_expected_values(base, &base_reduced_values);
-            let brl_95p_bit_width = calculate_percentile_bits(&base_reduced_values, 0.95);
-            let brl_100p_bit_width = calculate_percentile_bits(&base_reduced_values, 1.0);
-            // Need enough outliers to require patching
-            prop_assume!(brl_95p_bit_width != brl_100p_bit_width);
-            let actual = roundtrip_patched_base_helper(
-                &base_reduced_values,
-                base,
-                brl_95p_bit_width,
-                brl_100p_bit_width
-            )?;
-            prop_assert_eq!(actual, expected);
-        }
-    }
-}
diff --git a/src/encoding/integer/rle_v2/short_repeat.rs b/src/encoding/integer/rle_v2/short_repeat.rs
deleted file mode 100644
index 304d2cb4..00000000
--- a/src/encoding/integer/rle_v2/short_repeat.rs
+++ /dev/null
@@ -1,132 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::io::Read;
-
-use bytes::{BufMut, BytesMut};
-
-use crate::{
-    encoding::integer::{rle_v2::EncodingType, EncodingSign},
-    error::{OutOfSpecSnafu, Result},
-};
-
-use super::{NInt, SHORT_REPEAT_MIN_LENGTH};
-
-pub fn read_short_repeat_values<N: NInt, R: Read, S: EncodingSign>(
-    reader: &mut R,
-    out_ints: &mut Vec<N>,
-    header: u8,
-) -> Result<()> {
-    // Header byte:
-    //
-    // eeww_wccc
-    // 7       0 LSB
-    //
-    // ee  = Sub-encoding bits, always 00
-    // www = Value width bits
-    // ccc = Repeat count bits
-
-    let byte_width = (header >> 3) & 0x07; // Encoded as 0 to 7
-    let byte_width = byte_width as usize + 1; // Decode to 1 to 8 bytes
-
-    if N::BYTE_SIZE < byte_width {
-        return OutOfSpecSnafu {
-            msg:
-                "byte width of short repeat encoding exceeds byte size of integer being decoded to",
-        }
-        .fail();
-    }
-
-    let run_length = (header & 0x07) as usize + SHORT_REPEAT_MIN_LENGTH;
-
-    // Value that is being repeated is encoded as value_byte_width bytes in big endian format
-    let val = N::read_big_endian(reader, byte_width)?;
-    let val = S::zigzag_decode(val);
-
-    out_ints.extend(std::iter::repeat(val).take(run_length));
-
-    Ok(())
-}
-
-pub fn write_short_repeat<N: NInt, S: EncodingSign>(writer: &mut BytesMut, value: N, count: usize) {
-    debug_assert!((SHORT_REPEAT_MIN_LENGTH..=10).contains(&count));
-
-    let value = S::zigzag_encode(value);
-
-    // Take max in case value = 0
-    let byte_size = value.bits_used().div_ceil(8).max(1) as u8;
-    let encoded_byte_size = byte_size - 1;
-    let encoded_count = (count - SHORT_REPEAT_MIN_LENGTH) as u8;
-
-    let header = EncodingType::ShortRepeat.to_header() | (encoded_byte_size << 3) | encoded_count;
-    let bytes = value.to_be_bytes();
-    let bytes = &bytes.as_ref()[N::BYTE_SIZE - byte_size as usize..];
-
-    writer.put_u8(header);
-    writer.put_slice(bytes);
-}
-
-#[cfg(test)]
-mod tests {
-    use std::io::Cursor;
-
-    use proptest::prelude::*;
-
-    use crate::encoding::integer::{SignedEncoding, UnsignedEncoding};
-
-    use super::*;
-
-    fn roundtrip_short_repeat_helper<N: NInt, S: EncodingSign>(
-        value: N,
-        count: usize,
-    ) -> Result<Vec<N>> {
-        let mut buf = BytesMut::new();
-        let mut out = vec![];
-
-        write_short_repeat::<_, S>(&mut buf, value, count);
-        let header = buf[0];
-        read_short_repeat_values::<_, _, S>(&mut Cursor::new(&buf[1..]), &mut out, header)?;
-
-        Ok(out)
-    }
-
-    proptest! {
-        #[test]
-        fn roundtrip_short_repeat_i16(value: i16, count in 3_usize..=10) {
-            let out = roundtrip_short_repeat_helper::<_, SignedEncoding>(value, count)?;
-            prop_assert_eq!(out, vec![value; count]);
-        }
-
-        #[test]
-        fn roundtrip_short_repeat_i32(value: i32, count in 3_usize..=10) {
-            let out = roundtrip_short_repeat_helper::<_, SignedEncoding>(value, count)?;
-            prop_assert_eq!(out, vec![value; count]);
-        }
-
-        #[test]
-        fn roundtrip_short_repeat_i64(value: i64, count in 3_usize..=10) {
-            let out = roundtrip_short_repeat_helper::<_, SignedEncoding>(value, count)?;
-            prop_assert_eq!(out, vec![value; count]);
-        }
-
-        #[test]
-        fn roundtrip_short_repeat_i64_unsigned(value in 0..=i64::MAX, count in 3_usize..=10) {
-            let out = roundtrip_short_repeat_helper::<_, UnsignedEncoding>(value, count)?;
-            prop_assert_eq!(out, vec![value; count]);
-        }
-    }
-}
diff --git a/src/encoding/integer/util.rs b/src/encoding/integer/util.rs
deleted file mode 100644
index f96e3ec0..00000000
--- a/src/encoding/integer/util.rs
+++ /dev/null
@@ -1,928 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::io::Read;
-
-use bytes::{BufMut, BytesMut};
-use num::Signed;
-use snafu::OptionExt;
-
-use crate::{
-    encoding::util::read_u8,
-    error::{Result, VarintTooLargeSnafu},
-};
-
-use super::{EncodingSign, NInt, VarintSerde};
-
-/// Extracting run length from first two header bytes.
-///
-/// Run length encoded as range [0, 511], so adjust to actual
-/// value in range [1, 512].
-///
-/// Used for patched base, delta, and direct sub-encodings.
-pub fn extract_run_length_from_header(first_byte: u8, second_byte: u8) -> usize {
-    let length = ((first_byte as u16 & 0x01) << 8) | (second_byte as u16);
-    (length + 1) as usize
-}
-
-/// Read bitpacked integers into provided buffer. `bit_size` can be any value from 1 to 64,
-/// inclusive.
-pub fn read_ints<N: NInt>(
-    buffer: &mut Vec<N>,
-    expected_no_of_ints: usize,
-    bit_size: usize,
-    r: &mut impl Read,
-) -> Result<()> {
-    debug_assert!(
-        (1..=64).contains(&bit_size),
-        "bit_size must be in range [1, 64]"
-    );
-    match bit_size {
-        1 => unrolled_unpack_1(buffer, expected_no_of_ints, r),
-        2 => unrolled_unpack_2(buffer, expected_no_of_ints, r),
-        4 => unrolled_unpack_4(buffer, expected_no_of_ints, r),
-        n if n % 8 == 0 => unrolled_unpack_byte_aligned(buffer, expected_no_of_ints, r, n / 8),
-        n => unrolled_unpack_unaligned(buffer, expected_no_of_ints, r, n),
-    }
-}
-
-/// Decode numbers with bit width of 1 from read stream
-fn unrolled_unpack_1<N: NInt>(
-    buffer: &mut Vec<N>,
-    expected_num_of_ints: usize,
-    reader: &mut impl Read,
-) -> Result<()> {
-    for _ in 0..(expected_num_of_ints / 8) {
-        let byte = read_u8(reader)?;
-        let nums = [
-            (byte >> 7) & 1,
-            (byte >> 6) & 1,
-            (byte >> 5) & 1,
-            (byte >> 4) & 1,
-            (byte >> 3) & 1,
-            (byte >> 2) & 1,
-            (byte >> 1) & 1,
-            byte & 1,
-        ];
-        buffer.extend(nums.map(N::from_u8));
-    }
-
-    // Less than full byte at end, extract these trailing numbers
-    let remainder = expected_num_of_ints % 8;
-    if remainder > 0 {
-        let byte = read_u8(reader)?;
-        for i in 0..remainder {
-            let shift = 7 - i;
-            let n = N::from_u8((byte >> shift) & 1);
-            buffer.push(n);
-        }
-    }
-
-    Ok(())
-}
-
-/// Decode numbers with bit width of 2 from read stream
-fn unrolled_unpack_2<N: NInt>(
-    buffer: &mut Vec<N>,
-    expected_num_of_ints: usize,
-    reader: &mut impl Read,
-) -> Result<()> {
-    for _ in 0..(expected_num_of_ints / 4) {
-        let byte = read_u8(reader)?;
-        let nums = [(byte >> 6) & 3, (byte >> 4) & 3, (byte >> 2) & 3, byte & 3];
-        buffer.extend(nums.map(N::from_u8));
-    }
-
-    // Less than full byte at end, extract these trailing numbers
-    let remainder = expected_num_of_ints % 4;
-    if remainder > 0 {
-        let byte = read_u8(reader)?;
-        for i in 0..remainder {
-            let shift = 6 - (i * 2);
-            let n = N::from_u8((byte >> shift) & 3);
-            buffer.push(n);
-        }
-    }
-
-    Ok(())
-}
-
-/// Decode numbers with bit width of 4 from read stream
-fn unrolled_unpack_4<N: NInt>(
-    buffer: &mut Vec<N>,
-    expected_num_of_ints: usize,
-    reader: &mut impl Read,
-) -> Result<()> {
-    for _ in 0..(expected_num_of_ints / 2) {
-        let byte = read_u8(reader)?;
-        let nums = [(byte >> 4) & 15, byte & 15];
-        buffer.extend(nums.map(N::from_u8));
-    }
-
-    // At worst have 1 trailing 4-bit number
-    let remainder = expected_num_of_ints % 2;
-    if remainder > 0 {
-        let byte = read_u8(reader)?;
-        let n = N::from_u8((byte >> 4) & 15);
-        buffer.push(n);
-    }
-
-    Ok(())
-}
-
-/// When the bitpacked integers have a width that isn't byte aligned
-fn unrolled_unpack_unaligned<N: NInt>(
-    buffer: &mut Vec<N>,
-    expected_num_of_ints: usize,
-    reader: &mut impl Read,
-    bit_size: usize,
-) -> Result<()> {
-    debug_assert!(
-        bit_size <= (N::BYTE_SIZE * 8),
-        "bit_size cannot exceed size of N"
-    );
-
-    let mut bits_left = 0;
-    let mut current_bits = N::zero();
-    for _ in 0..expected_num_of_ints {
-        let mut result = N::zero();
-        let mut bits_left_to_read = bit_size;
-
-        // No bounds check as we rely on caller doing this check
-        // (since we know bit_size in advance)
-
-        // bits_left_to_read and bits_left can never exceed 8
-        // So safe to convert either to N
-        while bits_left_to_read > bits_left {
-            // TODO: explain this logic a bit
-            result <<= bits_left;
-            let mask = ((1_u16 << bits_left) - 1) as u8;
-            let mask = N::from_u8(mask);
-            result |= current_bits & mask;
-            bits_left_to_read -= bits_left;
-
-            let byte = read_u8(reader)?;
-            current_bits = N::from_u8(byte);
-
-            bits_left = 8;
-        }
-
-        if bits_left_to_read > 0 {
-            result <<= bits_left_to_read;
-            bits_left -= bits_left_to_read;
-            let bits = current_bits >> bits_left;
-            let mask = ((1_u16 << bits_left_to_read) - 1) as u8;
-            let mask = N::from_u8(mask);
-            result |= bits & mask;
-        }
-
-        buffer.push(result);
-    }
-
-    Ok(())
-}
-
-/// Decode bitpacked integers which are byte aligned
-#[inline]
-fn unrolled_unpack_byte_aligned<N: NInt>(
-    buffer: &mut Vec<N>,
-    expected_num_of_ints: usize,
-    r: &mut impl Read,
-    num_bytes: usize,
-) -> Result<()> {
-    debug_assert!(
-        num_bytes <= N::BYTE_SIZE,
-        "num_bytes cannot exceed size of integer being decoded into"
-    );
-    // TODO: can probably read direct into buffer? read_big_endian() decodes
-    //       into an intermediary buffer.
-    for _ in 0..expected_num_of_ints {
-        let num = N::read_big_endian(r, num_bytes)?;
-        buffer.push(num);
-    }
-    Ok(())
-}
-
-/// Write bit packed integers, where we expect the `bit_width` to be aligned
-/// by [`get_closest_aligned_bit_width`], and we write the bytes as big endian.
-pub fn write_aligned_packed_ints<N: NInt>(writer: &mut BytesMut, bit_width: usize, values: &[N]) {
-    debug_assert!(
-        bit_width == 1 || bit_width == 2 || bit_width == 4 || bit_width % 8 == 0,
-        "bit_width must be 1, 2, 4 or a multiple of 8"
-    );
-    match bit_width {
-        1 => unrolled_pack_1(writer, values),
-        2 => unrolled_pack_2(writer, values),
-        4 => unrolled_pack_4(writer, values),
-        n => unrolled_pack_bytes(writer, n / 8, values),
-    }
-}
-
-/// Similar to [`write_aligned_packed_ints`] but the `bit_width` allows any value
-/// in the range `[1, 64]`.
-pub fn write_packed_ints<N: NInt>(writer: &mut BytesMut, bit_width: usize, values: &[N]) {
-    debug_assert!(
-        (1..=64).contains(&bit_width),
-        "bit_width must be in the range [1, 64]"
-    );
-    if bit_width == 1 || bit_width == 2 || bit_width == 4 || bit_width % 8 == 0 {
-        write_aligned_packed_ints(writer, bit_width, values);
-    } else {
-        write_unaligned_packed_ints(writer, bit_width, values)
-    }
-}
-
-fn write_unaligned_packed_ints<N: NInt>(writer: &mut BytesMut, bit_width: usize, values: &[N]) {
-    debug_assert!(
-        (1..=64).contains(&bit_width),
-        "bit_width must be in the range [1, 64]"
-    );
-    let mut bits_left = 8;
-    let mut current_byte = 0;
-    for &value in values {
-        let mut bits_to_write = bit_width;
-        // This loop will write 8 bits at a time into current_byte, except for the
-        // first iteration after a previous value has been written. The previous
-        // value may have bits left over, still in current_byte, which is represented
-        // by 8 - bits_left (aka bits_left is the amount of space left in current_byte).
-        while bits_to_write > bits_left {
-            // Writing from most significant bits first.
-            let shift = bits_to_write - bits_left;
-            // Shift so bits to write are in least significant 8 bits.
-            // Masking out higher bits so conversion to u8 is safe.
-            let bits = value.unsigned_shr(shift as u32) & N::from_u8(0xFF);
-            current_byte |= bits.to_u8().unwrap();
-            bits_to_write -= bits_left;
-
-            writer.put_u8(current_byte);
-            current_byte = 0;
-            bits_left = 8;
-        }
-
-        // If there are trailing bits then include these into current_byte.
-        bits_left -= bits_to_write;
-        let bits = (value << bits_left) & N::from_u8(0xFF);
-        current_byte |= bits.to_u8().unwrap();
-
-        if bits_left == 0 {
-            writer.put_u8(current_byte);
-            current_byte = 0;
-            bits_left = 8;
-        }
-    }
-    // Flush any remaining bits
-    if bits_left != 8 {
-        writer.put_u8(current_byte);
-    }
-}
-
-fn unrolled_pack_1<N: NInt>(writer: &mut BytesMut, values: &[N]) {
-    let mut iter = values.chunks_exact(8);
-    for chunk in &mut iter {
-        let n1 = chunk[0].to_u8().unwrap() & 0x01;
-        let n2 = chunk[1].to_u8().unwrap() & 0x01;
-        let n3 = chunk[2].to_u8().unwrap() & 0x01;
-        let n4 = chunk[3].to_u8().unwrap() & 0x01;
-        let n5 = chunk[4].to_u8().unwrap() & 0x01;
-        let n6 = chunk[5].to_u8().unwrap() & 0x01;
-        let n7 = chunk[6].to_u8().unwrap() & 0x01;
-        let n8 = chunk[7].to_u8().unwrap() & 0x01;
-        let byte =
-            (n1 << 7) | (n2 << 6) | (n3 << 5) | (n4 << 4) | (n5 << 3) | (n6 << 2) | (n7 << 1) | n8;
-        writer.put_u8(byte);
-    }
-    let remainder = iter.remainder();
-    if !remainder.is_empty() {
-        let mut byte = 0;
-        for (i, n) in remainder.iter().enumerate() {
-            let n = n.to_u8().unwrap();
-            byte |= (n & 0x03) << (7 - i);
-        }
-        writer.put_u8(byte);
-    }
-}
-
-fn unrolled_pack_2<N: NInt>(writer: &mut BytesMut, values: &[N]) {
-    let mut iter = values.chunks_exact(4);
-    for chunk in &mut iter {
-        let n1 = chunk[0].to_u8().unwrap() & 0x03;
-        let n2 = chunk[1].to_u8().unwrap() & 0x03;
-        let n3 = chunk[2].to_u8().unwrap() & 0x03;
-        let n4 = chunk[3].to_u8().unwrap() & 0x03;
-        let byte = (n1 << 6) | (n2 << 4) | (n3 << 2) | n4;
-        writer.put_u8(byte);
-    }
-    let remainder = iter.remainder();
-    if !remainder.is_empty() {
-        let mut byte = 0;
-        for (i, n) in remainder.iter().enumerate() {
-            let n = n.to_u8().unwrap();
-            byte |= (n & 0x03) << (6 - i * 2);
-        }
-        writer.put_u8(byte);
-    }
-}
-
-fn unrolled_pack_4<N: NInt>(writer: &mut BytesMut, values: &[N]) {
-    let mut iter = values.chunks_exact(2);
-    for chunk in &mut iter {
-        let n1 = chunk[0].to_u8().unwrap() & 0x0F;
-        let n2 = chunk[1].to_u8().unwrap() & 0x0F;
-        let byte = (n1 << 4) | n2;
-        writer.put_u8(byte);
-    }
-    let remainder = iter.remainder();
-    if !remainder.is_empty() {
-        let byte = remainder[0].to_u8().unwrap() & 0x0F;
-        let byte = byte << 4;
-        writer.put_u8(byte);
-    }
-}
-
-fn unrolled_pack_bytes<N: NInt>(writer: &mut BytesMut, byte_size: usize, values: &[N]) {
-    for num in values {
-        let bytes = num.to_be_bytes();
-        let bytes = &bytes.as_ref()[N::BYTE_SIZE - byte_size..];
-        writer.put_slice(bytes);
-    }
-}
-
-/// Decoding table for RLEv2 sub-encodings bit width.
-///
-/// Used by Direct, Patched Base and Delta. By default this assumes non-delta
-/// (0 maps to 1), so Delta handles this discrepancy at the caller side.
-///
-/// Input must be a 5-bit integer (max value is 31).
-pub fn rle_v2_decode_bit_width(encoded: u8) -> usize {
-    debug_assert!(encoded < 32, "encoded bit width cannot exceed 5 bits");
-    match encoded {
-        0..=23 => encoded as usize + 1,
-        24 => 26,
-        25 => 28,
-        26 => 30,
-        27 => 32,
-        28 => 40,
-        29 => 48,
-        30 => 56,
-        31 => 64,
-        _ => unreachable!(),
-    }
-}
-
-/// Inverse of [`rle_v2_decode_bit_width`].
-///
-/// Assumes supported bit width is passed in. Will panic on invalid
-/// inputs that aren't defined in the ORC bit width encoding table
-/// (such as 50).
-pub fn rle_v2_encode_bit_width(width: usize) -> u8 {
-    debug_assert!(width <= 64, "bit width cannot exceed 64");
-    match width {
-        64 => 31,
-        56 => 30,
-        48 => 29,
-        40 => 28,
-        32 => 27,
-        30 => 26,
-        28 => 25,
-        26 => 24,
-        1..=24 => width as u8 - 1,
-        _ => unreachable!(),
-    }
-}
-
-pub fn get_closest_fixed_bits(n: usize) -> usize {
-    match n {
-        0 => 1,
-        1..=24 => n,
-        25..=26 => 26,
-        27..=28 => 28,
-        29..=30 => 30,
-        31..=32 => 32,
-        33..=40 => 40,
-        41..=48 => 48,
-        49..=56 => 56,
-        57..=64 => 64,
-        _ => unreachable!(),
-    }
-}
-
-pub fn encode_bit_width(n: usize) -> usize {
-    let n = get_closest_fixed_bits(n);
-    match n {
-        1..=24 => n - 1,
-        25..=26 => 24,
-        27..=28 => 25,
-        29..=30 => 26,
-        31..=32 => 27,
-        33..=40 => 28,
-        41..=48 => 29,
-        49..=56 => 30,
-        57..=64 => 31,
-        _ => unreachable!(),
-    }
-}
-
-fn decode_bit_width(n: usize) -> usize {
-    match n {
-        0..=23 => n + 1,
-        24 => 26,
-        25 => 28,
-        26 => 30,
-        27 => 32,
-        28 => 40,
-        29 => 48,
-        30 => 56,
-        31 => 64,
-        _ => unreachable!(),
-    }
-}
-
-/// Converts width of 64 bits or less to an aligned width, either rounding
-/// up to the nearest multiple of 8, or rounding up to 1, 2 or 4.
-pub fn get_closest_aligned_bit_width(width: usize) -> usize {
-    debug_assert!(width <= 64, "bit width cannot exceed 64");
-    match width {
-        0..=1 => 1,
-        2 => 2,
-        3..=4 => 4,
-        5..=8 => 8,
-        9..=16 => 16,
-        17..=24 => 24,
-        25..=32 => 32,
-        33..=40 => 40,
-        41..=48 => 48,
-        49..=54 => 56,
-        55..=64 => 64,
-        _ => unreachable!(),
-    }
-}
-
-/// Decode Base 128 Unsigned Varint
-fn read_varint<N: VarintSerde, R: Read>(reader: &mut R) -> Result<N> {
-    // Varints are encoded as sequence of bytes.
-    // Where the high bit of a byte is set to 1 if the varint
-    // continues into the next byte. Eventually it should terminate
-    // with a byte with high bit of 0.
-    let mut num = N::zero();
-    let mut offset = 0;
-    loop {
-        let byte = read_u8(reader)?;
-        let is_last_byte = byte & 0x80 == 0;
-        let without_continuation_bit = byte & 0x7F;
-        num |= N::from_u8(without_continuation_bit)
-            // Ensure we don't overflow
-            .checked_shl(offset)
-            .context(VarintTooLargeSnafu)?;
-        // Since high bit doesn't contribute to final number,
-        // we need to shift in multiples of 7 to account for this.
-        offset += 7;
-        if is_last_byte {
-            break;
-        }
-    }
-    Ok(num)
-}
-
-/// Encode Base 128 Unsigned Varint
-fn write_varint<N: VarintSerde>(writer: &mut BytesMut, value: N) {
-    // Take max in case value = 0.
-    // Divide by 7 as high bit is always used as continuation flag.
-    let byte_size = value.bits_used().div_ceil(7).max(1);
-    // By default we'll have continuation bit set
-    // TODO: can probably do without Vec allocation?
-    let mut bytes = vec![0x80; byte_size];
-    // Then just clear for the last one
-    let i = bytes.len() - 1;
-    bytes[i] = 0;
-
-    // Encoding 7 bits at a time into bytes
-    let mask = N::from_u8(0x7F);
-    for (i, b) in bytes.iter_mut().enumerate() {
-        let shift = i * 7;
-        *b |= ((value >> shift) & mask).to_u8().unwrap();
-    }
-
-    writer.put_slice(&bytes);
-}
-
-pub fn read_varint_zigzagged<N: VarintSerde, R: Read, S: EncodingSign>(
-    reader: &mut R,
-) -> Result<N> {
-    let unsigned = read_varint::<N, _>(reader)?;
-    Ok(S::zigzag_decode(unsigned))
-}
-
-pub fn write_varint_zigzagged<N: VarintSerde, S: EncodingSign>(writer: &mut BytesMut, value: N) {
-    let value = S::zigzag_encode(value);
-    write_varint(writer, value)
-}
-
-/// Zigzag encoding stores the sign bit in the least significant bit.
-#[inline]
-pub fn signed_zigzag_decode<N: VarintSerde + Signed>(encoded: N) -> N {
-    let without_sign_bit = encoded.unsigned_shr(1);
-    let sign_bit = encoded & N::one();
-    // If positive, sign_bit is 0
-    //   Negating 0 and doing bitwise XOR will just return without_sign_bit
-    //   Since A ^ 0 = A
-    // If negative, sign_bit is 1
-    //   Negating turns to 11111...11
-    //   Then A ^ 1 = ~A (negating every bit in A)
-    without_sign_bit ^ -sign_bit
-}
-
-/// Opposite of [`signed_zigzag_decode`].
-#[inline]
-pub fn signed_zigzag_encode<N: VarintSerde + Signed>(value: N) -> N {
-    let l = N::BYTE_SIZE * 8 - 1;
-    (value << 1_usize) ^ (value >> l)
-}
-
-/// MSB indicates if value is negated (1 if negative, else positive). Note we
-/// take the MSB of the encoded number which might be smaller than N, hence
-/// we need the encoded number byte size to find this MSB.
-#[inline]
-pub fn signed_msb_decode<N: NInt + Signed>(encoded: N, encoded_byte_size: usize) -> N {
-    let msb_mask = N::one() << (encoded_byte_size * 8 - 1);
-    let is_positive = (encoded & msb_mask) == N::zero();
-    let clean_sign_bit_mask = !msb_mask;
-    let encoded = encoded & clean_sign_bit_mask;
-    if is_positive {
-        encoded
-    } else {
-        -encoded
-    }
-}
-
-/// Inverse of [`signed_msb_decode`].
-#[inline]
-// TODO: bound this to only allow i64 input? might mess up for i32::MIN?
-pub fn signed_msb_encode<N: NInt + Signed>(value: N, encoded_byte_size: usize) -> N {
-    let is_signed = value.is_negative();
-    // 0 if unsigned, 1 if signed
-    let sign_bit = N::from_u8(is_signed as u8);
-    let value = value.abs();
-    let encoded_msb = sign_bit << (encoded_byte_size * 8 - 1);
-    encoded_msb | value
-}
-
-/// Get the nth percentile, where input percentile must be in range (0.0, 1.0].
-pub fn calculate_percentile_bits<N: VarintSerde>(values: &[N], percentile: f32) -> usize {
-    debug_assert!(
-        percentile > 0.0 && percentile <= 1.0,
-        "percentile must be in range (0.0, 1.0]"
-    );
-
-    let mut histogram = [0; 32];
-    for n in values {
-        // Map into range [0, 31]
-        let encoded_bit_width = encode_bit_width(n.bits_used());
-        histogram[encoded_bit_width] += 1;
-    }
-
-    // Then calculate the percentile here
-    let count = values.len() as f32;
-    let mut per_len = ((1.0 - percentile) * count) as usize;
-    for i in (0..32).rev() {
-        if let Some(a) = per_len.checked_sub(histogram[i]) {
-            per_len = a;
-        } else {
-            return decode_bit_width(i);
-        }
-    }
-
-    // If percentile is in correct input range then we should always return above
-    unreachable!()
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::{
-        encoding::integer::{SignedEncoding, UnsignedEncoding},
-        error::Result,
-    };
-    use proptest::prelude::*;
-    use std::io::Cursor;
-
-    #[test]
-    fn test_zigzag_decode() {
-        assert_eq!(0, signed_zigzag_decode(0));
-        assert_eq!(-1, signed_zigzag_decode(1));
-        assert_eq!(1, signed_zigzag_decode(2));
-        assert_eq!(-2, signed_zigzag_decode(3));
-        assert_eq!(2, signed_zigzag_decode(4));
-        assert_eq!(-3, signed_zigzag_decode(5));
-        assert_eq!(3, signed_zigzag_decode(6));
-        assert_eq!(-4, signed_zigzag_decode(7));
-        assert_eq!(4, signed_zigzag_decode(8));
-        assert_eq!(-5, signed_zigzag_decode(9));
-
-        assert_eq!(9_223_372_036_854_775_807, signed_zigzag_decode(-2_i64));
-        assert_eq!(-9_223_372_036_854_775_808, signed_zigzag_decode(-1_i64));
-    }
-
-    #[test]
-    fn test_zigzag_encode() {
-        assert_eq!(0, signed_zigzag_encode(0));
-        assert_eq!(1, signed_zigzag_encode(-1));
-        assert_eq!(2, signed_zigzag_encode(1));
-        assert_eq!(3, signed_zigzag_encode(-2));
-        assert_eq!(4, signed_zigzag_encode(2));
-        assert_eq!(5, signed_zigzag_encode(-3));
-        assert_eq!(6, signed_zigzag_encode(3));
-        assert_eq!(7, signed_zigzag_encode(-4));
-        assert_eq!(8, signed_zigzag_encode(4));
-        assert_eq!(9, signed_zigzag_encode(-5));
-
-        assert_eq!(-2_i64, signed_zigzag_encode(9_223_372_036_854_775_807));
-        assert_eq!(-1_i64, signed_zigzag_encode(-9_223_372_036_854_775_808));
-    }
-
-    #[test]
-    fn roundtrip_zigzag_edge_cases() {
-        let value = 0_i16;
-        assert_eq!(signed_zigzag_decode(signed_zigzag_encode(value)), value);
-        let value = i16::MAX;
-        assert_eq!(signed_zigzag_decode(signed_zigzag_encode(value)), value);
-
-        let value = 0_i32;
-        assert_eq!(signed_zigzag_decode(signed_zigzag_encode(value)), value);
-        let value = i32::MAX;
-        assert_eq!(signed_zigzag_decode(signed_zigzag_encode(value)), value);
-        let value = i32::MIN;
-        assert_eq!(signed_zigzag_decode(signed_zigzag_encode(value)), value);
-
-        let value = 0_i64;
-        assert_eq!(signed_zigzag_decode(signed_zigzag_encode(value)), value);
-        let value = i64::MAX;
-        assert_eq!(signed_zigzag_decode(signed_zigzag_encode(value)), value);
-        let value = i64::MIN;
-        assert_eq!(signed_zigzag_decode(signed_zigzag_encode(value)), value);
-    }
-
-    proptest! {
-        #[test]
-        fn roundtrip_zigzag_i16(value: i16) {
-            let out = signed_zigzag_decode(signed_zigzag_encode(value));
-            prop_assert_eq!(value, out);
-        }
-
-        #[test]
-        fn roundtrip_zigzag_i32(value: i32) {
-            let out = signed_zigzag_decode(signed_zigzag_encode(value));
-            prop_assert_eq!(value, out);
-        }
-
-        #[test]
-        fn roundtrip_zigzag_i64(value: i64) {
-            let out = signed_zigzag_decode(signed_zigzag_encode(value));
-            prop_assert_eq!(value, out);
-        }
-    }
-
-    fn generate_msb_test_value<N: NInt + Signed>(
-        seed_value: N,
-        byte_size: usize,
-        signed: bool,
-    ) -> N {
-        // We mask out to values that can fit within the specified byte_size.
-        let shift = (N::BYTE_SIZE - byte_size) * 8;
-        let mask = N::max_value().unsigned_shr(shift as u32);
-        // And remove the msb since we manually set a value to signed based on the signed parameter.
-        let mask = mask >> 1;
-        let value = seed_value & mask;
-        // This guarantees values that can fit within byte_size when they are msb encoded, both
-        // signed and unsigned.
-        if signed {
-            -value
-        } else {
-            value
-        }
-    }
-
-    #[test]
-    fn roundtrip_msb_edge_cases() {
-        // Testing all cases of max values for byte_size + signed combinations
-        for byte_size in 1..=2 {
-            for signed in [true, false] {
-                let value = generate_msb_test_value(i16::MAX, byte_size, signed);
-                let out = signed_msb_decode(signed_msb_encode(value, byte_size), byte_size);
-                assert_eq!(value, out);
-            }
-        }
-
-        for byte_size in 1..=4 {
-            for signed in [true, false] {
-                let value = generate_msb_test_value(i32::MAX, byte_size, signed);
-                let out = signed_msb_decode(signed_msb_encode(value, byte_size), byte_size);
-                assert_eq!(value, out);
-            }
-        }
-
-        for byte_size in 1..=8 {
-            for signed in [true, false] {
-                let value = generate_msb_test_value(i64::MAX, byte_size, signed);
-                let out = signed_msb_decode(signed_msb_encode(value, byte_size), byte_size);
-                assert_eq!(value, out);
-            }
-        }
-    }
-
-    proptest! {
-        #[test]
-        fn roundtrip_msb_i16(value: i16, byte_size in 1..=2_usize, signed: bool) {
-            let value = generate_msb_test_value(value, byte_size, signed);
-            let out = signed_msb_decode(signed_msb_encode(value, byte_size), byte_size);
-            prop_assert_eq!(value, out);
-        }
-
-        #[test]
-        fn roundtrip_msb_i32(value: i32, byte_size in 1..=4_usize, signed: bool) {
-            let value = generate_msb_test_value(value, byte_size, signed);
-            let out = signed_msb_decode(signed_msb_encode(value, byte_size), byte_size);
-            prop_assert_eq!(value, out);
-        }
-
-        #[test]
-        fn roundtrip_msb_i64(value: i64, byte_size in 1..=8_usize, signed: bool) {
-            let value = generate_msb_test_value(value, byte_size, signed);
-            let out = signed_msb_decode(signed_msb_encode(value, byte_size), byte_size);
-            prop_assert_eq!(value, out);
-        }
-    }
-
-    #[test]
-    fn test_read_varint() -> Result<()> {
-        fn test_assert(serialized: &[u8], expected: i64) -> Result<()> {
-            let mut reader = Cursor::new(serialized);
-            assert_eq!(
-                expected,
-                read_varint_zigzagged::<i64, _, UnsignedEncoding>(&mut reader)?
-            );
-            Ok(())
-        }
-
-        test_assert(&[0x00], 0)?;
-        test_assert(&[0x01], 1)?;
-        test_assert(&[0x7f], 127)?;
-        test_assert(&[0x80, 0x01], 128)?;
-        test_assert(&[0x81, 0x01], 129)?;
-        test_assert(&[0xff, 0x7f], 16_383)?;
-        test_assert(&[0x80, 0x80, 0x01], 16_384)?;
-        test_assert(&[0x81, 0x80, 0x01], 16_385)?;
-
-        // when too large
-        let err = read_varint_zigzagged::<i64, _, UnsignedEncoding>(&mut Cursor::new(&[
-            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01,
-        ]));
-        assert!(err.is_err());
-        assert_eq!(
-            "Varint being decoded is too large",
-            err.unwrap_err().to_string()
-        );
-
-        // when unexpected end to stream
-        let err =
-            read_varint_zigzagged::<i64, _, UnsignedEncoding>(&mut Cursor::new(&[0x80, 0x80]));
-        assert!(err.is_err());
-        assert_eq!(
-            "Failed to read, source: failed to fill whole buffer",
-            err.unwrap_err().to_string()
-        );
-
-        Ok(())
-    }
-
-    fn roundtrip_varint<N: VarintSerde, S: EncodingSign>(value: N) -> N {
-        let mut buf = BytesMut::new();
-        write_varint_zigzagged::<N, S>(&mut buf, value);
-        read_varint_zigzagged::<N, _, S>(&mut Cursor::new(&buf)).unwrap()
-    }
-
-    proptest! {
-        #[test]
-        fn roundtrip_varint_i16(value: i16) {
-            let out = roundtrip_varint::<_, SignedEncoding>(value);
-            prop_assert_eq!(out, value);
-        }
-
-        #[test]
-        fn roundtrip_varint_i32(value: i32) {
-            let out = roundtrip_varint::<_, SignedEncoding>(value);
-            prop_assert_eq!(out, value);
-        }
-
-        #[test]
-        fn roundtrip_varint_i64(value: i64) {
-            let out = roundtrip_varint::<_, SignedEncoding>(value);
-            prop_assert_eq!(out, value);
-        }
-
-        #[test]
-        fn roundtrip_varint_i128(value: i128) {
-            let out = roundtrip_varint::<_, SignedEncoding>(value);
-            prop_assert_eq!(out, value);
-        }
-
-        #[test]
-        fn roundtrip_varint_u64(value in 0..=i64::MAX) {
-            let out = roundtrip_varint::<_, UnsignedEncoding>(value);
-            prop_assert_eq!(out, value);
-        }
-    }
-
-    #[test]
-    fn roundtrip_varint_edge_cases() {
-        let value = 0_i16;
-        assert_eq!(roundtrip_varint::<_, SignedEncoding>(value), value);
-        let value = i16::MIN;
-        assert_eq!(roundtrip_varint::<_, SignedEncoding>(value), value);
-        let value = i16::MAX;
-        assert_eq!(roundtrip_varint::<_, SignedEncoding>(value), value);
-
-        let value = 0_i32;
-        assert_eq!(roundtrip_varint::<_, SignedEncoding>(value), value);
-        let value = i32::MIN;
-        assert_eq!(roundtrip_varint::<_, SignedEncoding>(value), value);
-        let value = i32::MAX;
-        assert_eq!(roundtrip_varint::<_, SignedEncoding>(value), value);
-
-        let value = 0_i64;
-        assert_eq!(roundtrip_varint::<_, SignedEncoding>(value), value);
-        let value = i64::MIN;
-        assert_eq!(roundtrip_varint::<_, SignedEncoding>(value), value);
-        let value = i64::MAX;
-        assert_eq!(roundtrip_varint::<_, SignedEncoding>(value), value);
-
-        let value = 0_i128;
-        assert_eq!(roundtrip_varint::<_, SignedEncoding>(value), value);
-        let value = i128::MIN;
-        assert_eq!(roundtrip_varint::<_, SignedEncoding>(value), value);
-        let value = i128::MAX;
-        assert_eq!(roundtrip_varint::<_, SignedEncoding>(value), value);
-    }
-
-    /// Easier to generate values then bound them, instead of generating correctly bounded
-    /// values. In this case, bounds are that no value will exceed the `bit_width` in terms
-    /// of bit size.
-    fn mask_to_bit_width<N: NInt>(values: &[N], bit_width: usize) -> Vec<N> {
-        let shift = N::BYTE_SIZE * 8 - bit_width;
-        let mask = N::max_value().unsigned_shr(shift as u32);
-        values.iter().map(|&v| v & mask).collect()
-    }
-
-    fn roundtrip_packed_ints_serde<N: NInt>(values: &[N], bit_width: usize) -> Result<Vec<N>> {
-        let mut buf = BytesMut::new();
-        let mut out = vec![];
-        write_packed_ints(&mut buf, bit_width, values);
-        read_ints(&mut out, values.len(), bit_width, &mut Cursor::new(buf))?;
-        Ok(out)
-    }
-
-    proptest! {
-        #[test]
-        fn roundtrip_packed_ints_serde_i64(
-            values in prop::collection::vec(any::<i64>(), 1..=512),
-            bit_width in 1..=64_usize
-        ) {
-            let values = mask_to_bit_width(&values, bit_width);
-            let out = roundtrip_packed_ints_serde(&values, bit_width)?;
-            prop_assert_eq!(out, values);
-        }
-
-        #[test]
-        fn roundtrip_packed_ints_serde_i32(
-            values in prop::collection::vec(any::<i32>(), 1..=512),
-            bit_width in 1..=32_usize
-        ) {
-            let values = mask_to_bit_width(&values, bit_width);
-            let out = roundtrip_packed_ints_serde(&values, bit_width)?;
-            prop_assert_eq!(out, values);
-        }
-
-        #[test]
-        fn roundtrip_packed_ints_serde_i16(
-            values in prop::collection::vec(any::<i16>(), 1..=512),
-            bit_width in 1..=16_usize
-        ) {
-            let values = mask_to_bit_width(&values, bit_width);
-            let out = roundtrip_packed_ints_serde(&values, bit_width)?;
-            prop_assert_eq!(out, values);
-        }
-    }
-}
diff --git a/src/encoding/mod.rs b/src/encoding/mod.rs
deleted file mode 100644
index 871ae0b0..00000000
--- a/src/encoding/mod.rs
+++ /dev/null
@@ -1,157 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Encoding/decoding logic for writing/reading primitive values from ORC types.
-
-use arrow::buffer::NullBuffer;
-use bytes::Bytes;
-
-use crate::{error::Result, memory::EstimateMemory};
-
-pub mod boolean;
-pub mod byte;
-pub mod decimal;
-pub mod float;
-pub mod integer;
-mod rle;
-pub mod timestamp;
-mod util;
-
-/// Encodes primitive values into an internal buffer, usually with a specialized run length
-/// encoding for better compression.
-pub trait PrimitiveValueEncoder<V>: EstimateMemory
-where
-    V: Copy,
-{
-    fn new() -> Self;
-
-    fn write_one(&mut self, value: V);
-
-    fn write_slice(&mut self, values: &[V]) {
-        for &value in values {
-            self.write_one(value);
-        }
-    }
-
-    /// Take the encoded bytes, replacing it with an empty buffer.
-    // TODO: Figure out how to retain the allocation instead of handing
-    //       it off each time.
-    fn take_inner(&mut self) -> Bytes;
-}
-
-pub trait PrimitiveValueDecoder<V> {
-    /// Decode out.len() values into out at a time, failing if it cannot fill
-    /// the buffer.
-    fn decode(&mut self, out: &mut [V]) -> Result<()>;
-
-    /// Decode into `out` according to the `true` elements in `present`.
-    ///
-    /// `present` must be the same length as `out`.
-    fn decode_spaced(&mut self, out: &mut [V], present: &NullBuffer) -> Result<()> {
-        debug_assert_eq!(out.len(), present.len());
-
-        // First get all the non-null values into a contiguous range.
-        let non_null_count = present.len() - present.null_count();
-        if non_null_count == 0 {
-            // All nulls, don't bother decoding anything
-            return Ok(());
-        }
-        // We read into the back because valid_indices() below is not reversible,
-        // so we just reverse our algorithm.
-        let range_start = out.len() - non_null_count;
-        self.decode(&mut out[range_start..])?;
-        if non_null_count == present.len() {
-            // No nulls, don't need to space out
-            return Ok(());
-        }
-
-        // From the head of the contiguous range (at the end of the buffer) we swap
-        // with the null elements to ensure it matches with the present buffer.
-        let head_indices = range_start..out.len();
-        for (correct_index, head_index) in present.valid_indices().zip(head_indices) {
-            // head_index points to the value we need to move to correct_index
-            out.swap(correct_index, head_index);
-        }
-
-        Ok(())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use proptest::prelude::*;
-
-    use super::*;
-
-    /// Emits numbers increasing from 0.
-    struct DummyDecoder;
-
-    impl PrimitiveValueDecoder<i32> for DummyDecoder {
-        fn decode(&mut self, out: &mut [i32]) -> Result<()> {
-            let values = (0..out.len()).map(|x| x as i32).collect::<Vec<_>>();
-            out.copy_from_slice(&values);
-            Ok(())
-        }
-    }
-
-    fn gen_spaced_dummy_decoder_expected(present: &[bool]) -> Vec<i32> {
-        let mut value = 0;
-        let mut expected = vec![];
-        for &is_present in present {
-            if is_present {
-                expected.push(value);
-                value += 1;
-            } else {
-                expected.push(-1);
-            }
-        }
-        expected
-    }
-
-    proptest! {
-        #[test]
-        fn decode_spaced_proptest(present: Vec<bool>) {
-            let mut decoder = DummyDecoder;
-            let mut out = vec![-1; present.len()];
-            decoder.decode_spaced(&mut out, &NullBuffer::from(present.clone())).unwrap();
-            let expected = gen_spaced_dummy_decoder_expected(&present);
-            prop_assert_eq!(out, expected);
-        }
-    }
-
-    #[test]
-    fn decode_spaced_edge_cases() {
-        let mut decoder = DummyDecoder;
-        let len = 10;
-
-        // all present
-        let mut out = vec![-1; len];
-        let present = vec![true; len];
-        let present = NullBuffer::from(present);
-        decoder.decode_spaced(&mut out, &present).unwrap();
-        let expected: Vec<_> = (0..len).map(|i| i as i32).collect();
-        assert_eq!(out, expected);
-
-        // all null
-        let mut out = vec![-1; len];
-        let present = vec![false; len];
-        let present = NullBuffer::from(present);
-        decoder.decode_spaced(&mut out, &present).unwrap();
-        let expected = vec![-1; len];
-        assert_eq!(out, expected);
-    }
-}
diff --git a/src/encoding/rle.rs b/src/encoding/rle.rs
deleted file mode 100644
index a330efa1..00000000
--- a/src/encoding/rle.rs
+++ /dev/null
@@ -1,99 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::error::{OutOfSpecSnafu, Result};
-
-use super::PrimitiveValueDecoder;
-
-mod sealed {
-    use std::io::Read;
-
-    use crate::encoding::{
-        byte::ByteRleDecoder,
-        integer::{rle_v1::RleV1Decoder, rle_v2::RleV2Decoder, EncodingSign, NInt},
-    };
-
-    pub trait Rle {}
-
-    impl<R: Read> Rle for ByteRleDecoder<R> {}
-    impl<N: NInt, R: Read, S: EncodingSign> Rle for RleV1Decoder<N, R, S> {}
-    impl<N: NInt, R: Read, S: EncodingSign> Rle for RleV2Decoder<N, R, S> {}
-}
-
-/// Generic decoding behaviour for run length encoded values, such as integers (v1 and v2)
-/// and bytes.
-///
-/// Assumes an internal buffer which acts like a (single headed) queue where values are first
-/// decoded into, before being copied out into the output buffer (usually an Arrow array).
-pub trait GenericRle<V: Copy> {
-    /// Consume N elements from internal buffer to signify the values having been copied out.
-    fn advance(&mut self, n: usize);
-
-    /// All values available in internal buffer, respecting the current advancement level.
-    fn available(&self) -> &[V];
-
-    /// This should clear the internal buffer and populate it with the next round of decoded
-    /// values.
-    // TODO: Have a version that copies directly into the output buffer (e.g. Arrow array).
-    //       Currently we always decode to the internal buffer first, even if we can copy
-    //       directly to the output and skip the middle man. Ideally the internal buffer
-    //       should only be used for leftovers between calls to PrimitiveValueDecoder::decode.
-    fn decode_batch(&mut self) -> Result<()>;
-}
-
-impl<V: Copy, G: GenericRle<V> + sealed::Rle> PrimitiveValueDecoder<V> for G {
-    fn decode(&mut self, out: &mut [V]) -> Result<()> {
-        let available = self.available();
-        // If we have enough leftover to copy, can skip decoding more.
-        if available.len() >= out.len() {
-            out.copy_from_slice(&available[..out.len()]);
-            self.advance(out.len());
-            return Ok(());
-        }
-
-        // Otherwise progressively decode and copy over chunks.
-        let len_to_copy = out.len();
-        let mut copied = 0;
-        while copied < len_to_copy {
-            if self.available().is_empty() {
-                self.decode_batch()?;
-            }
-
-            let copying = self.available().len();
-            // At most, we fill to exact length of output buffer (don't overflow).
-            let copying = copying.min(len_to_copy - copied);
-
-            let out = &mut out[copied..];
-            out[..copying].copy_from_slice(&self.available()[..copying]);
-
-            copied += copying;
-            self.advance(copying);
-        }
-
-        // We always expect to be able to fill the output buffer; it is up to the
-        // caller to control that size.
-        if copied != out.len() {
-            // TODO: more descriptive error
-            OutOfSpecSnafu {
-                msg: "Array length less than expected",
-            }
-            .fail()
-        } else {
-            Ok(())
-        }
-    }
-}
diff --git a/src/encoding/timestamp.rs b/src/encoding/timestamp.rs
deleted file mode 100644
index d0113111..00000000
--- a/src/encoding/timestamp.rs
+++ /dev/null
@@ -1,183 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::marker::PhantomData;
-
-use arrow::datatypes::{ArrowTimestampType, TimeUnit};
-use snafu::ensure;
-
-use crate::{
-    encoding::PrimitiveValueDecoder,
-    error::{DecodeTimestampSnafu, Result},
-};
-
-const NANOSECONDS_IN_SECOND: i64 = 1_000_000_000;
-
-pub struct TimestampDecoder<T: ArrowTimestampType> {
-    base_from_epoch: i64,
-    data: Box<dyn PrimitiveValueDecoder<i64> + Send>,
-    secondary: Box<dyn PrimitiveValueDecoder<i64> + Send>,
-    _marker: PhantomData<T>,
-}
-
-impl<T: ArrowTimestampType> TimestampDecoder<T> {
-    pub fn new(
-        base_from_epoch: i64,
-        data: Box<dyn PrimitiveValueDecoder<i64> + Send>,
-        secondary: Box<dyn PrimitiveValueDecoder<i64> + Send>,
-    ) -> Self {
-        Self {
-            base_from_epoch,
-            data,
-            secondary,
-            _marker: PhantomData,
-        }
-    }
-}
-
-impl<T: ArrowTimestampType> PrimitiveValueDecoder<T::Native> for TimestampDecoder<T> {
-    fn decode(&mut self, out: &mut [T::Native]) -> Result<()> {
-        // TODO: can probably optimize, reuse buffers?
-        let mut data = vec![0; out.len()];
-        let mut secondary = vec![0; out.len()];
-        self.data.decode(&mut data)?;
-        self.secondary.decode(&mut secondary)?;
-        for (index, (&seconds_since_orc_base, &nanoseconds)) in
-            data.iter().zip(secondary.iter()).enumerate()
-        {
-            out[index] =
-                decode_timestamp::<T>(self.base_from_epoch, seconds_since_orc_base, nanoseconds)?;
-        }
-        Ok(())
-    }
-}
-
-/// Arrow TimestampNanosecond type cannot represent the full datetime range of
-/// the ORC Timestamp type, so this iterator provides the ability to decode the
-/// raw nanoseconds without restricting it to the Arrow TimestampNanosecond range.
-pub struct TimestampNanosecondAsDecimalDecoder {
-    base_from_epoch: i64,
-    data: Box<dyn PrimitiveValueDecoder<i64> + Send>,
-    secondary: Box<dyn PrimitiveValueDecoder<i64> + Send>,
-}
-
-impl TimestampNanosecondAsDecimalDecoder {
-    pub fn new(
-        base_from_epoch: i64,
-        data: Box<dyn PrimitiveValueDecoder<i64> + Send>,
-        secondary: Box<dyn PrimitiveValueDecoder<i64> + Send>,
-    ) -> Self {
-        Self {
-            base_from_epoch,
-            data,
-            secondary,
-        }
-    }
-}
-
-impl PrimitiveValueDecoder<i128> for TimestampNanosecondAsDecimalDecoder {
-    fn decode(&mut self, out: &mut [i128]) -> Result<()> {
-        // TODO: can probably optimize, reuse buffers?
-        let mut data = vec![0; out.len()];
-        let mut secondary = vec![0; out.len()];
-        self.data.decode(&mut data)?;
-        self.secondary.decode(&mut secondary)?;
-        for (index, (&seconds_since_orc_base, &nanoseconds)) in
-            data.iter().zip(secondary.iter()).enumerate()
-        {
-            out[index] =
-                decode_timestamp_as_i128(self.base_from_epoch, seconds_since_orc_base, nanoseconds);
-        }
-        Ok(())
-    }
-}
-
-fn decode(base: i64, seconds_since_orc_base: i64, nanoseconds: i64) -> (i128, i64, u64) {
-    let data = seconds_since_orc_base;
-    // TODO: is this a safe cast?
-    let mut nanoseconds = nanoseconds as u64;
-    // Last 3 bits indicate how many trailing zeros were truncated
-    let zeros = nanoseconds & 0x7;
-    nanoseconds >>= 3;
-    // Multiply by powers of 10 to get back the trailing zeros
-    // TODO: would it be more efficient to unroll this? (if LLVM doesn't already do so)
-    if zeros != 0 {
-        nanoseconds *= 10_u64.pow(zeros as u32 + 1);
-    }
-    let seconds_since_epoch = data + base;
-    // Timestamps below the UNIX epoch with nanoseconds > 999_999 need to be
-    // adjusted to have 1 second subtracted due to ORC-763:
-    // https://issues.apache.org/jira/browse/ORC-763
-    let seconds = if seconds_since_epoch < 0 && nanoseconds > 999_999 {
-        seconds_since_epoch - 1
-    } else {
-        seconds_since_epoch
-    };
-    // Convert into nanoseconds since epoch, which Arrow uses as native representation
-    // of timestamps
-    // The timestamp may overflow i64 as ORC encodes them as a pair of (seconds, nanoseconds)
-    // while we encode them as a single i64 of nanoseconds in Arrow.
-    let nanoseconds_since_epoch =
-        (seconds as i128 * NANOSECONDS_IN_SECOND as i128) + (nanoseconds as i128);
-    (nanoseconds_since_epoch, seconds, nanoseconds)
-}
-
-fn decode_timestamp<T: ArrowTimestampType>(
-    base: i64,
-    seconds_since_orc_base: i64,
-    nanoseconds: i64,
-) -> Result<i64> {
-    let (nanoseconds_since_epoch, seconds, nanoseconds) =
-        decode(base, seconds_since_orc_base, nanoseconds);
-
-    let nanoseconds_in_timeunit = match T::UNIT {
-        TimeUnit::Second => 1_000_000_000,
-        TimeUnit::Millisecond => 1_000_000,
-        TimeUnit::Microsecond => 1_000,
-        TimeUnit::Nanosecond => 1,
-    };
-
-    // Error if loss of precision
-    // TODO: make this configurable (e.g. can succeed but truncate)
-    ensure!(
-        nanoseconds_since_epoch % nanoseconds_in_timeunit == 0,
-        DecodeTimestampSnafu {
-            seconds,
-            nanoseconds,
-            to_time_unit: T::UNIT,
-        }
-    );
-
-    // Convert to i64 and error if overflow
-    let num_since_epoch = (nanoseconds_since_epoch / nanoseconds_in_timeunit)
-        .try_into()
-        .or_else(|_| {
-            DecodeTimestampSnafu {
-                seconds,
-                nanoseconds,
-                to_time_unit: T::UNIT,
-            }
-            .fail()
-        })?;
-
-    Ok(num_since_epoch)
-}
-
-fn decode_timestamp_as_i128(base: i64, seconds_since_orc_base: i64, nanoseconds: i64) -> i128 {
-    let (nanoseconds_since_epoch, _, _) = decode(base, seconds_since_orc_base, nanoseconds);
-    nanoseconds_since_epoch
-}
diff --git a/src/encoding/util.rs b/src/encoding/util.rs
deleted file mode 100644
index 862d12fb..00000000
--- a/src/encoding/util.rs
+++ /dev/null
@@ -1,38 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::io::Read;
-
-use snafu::ResultExt;
-
-use crate::error::{self, Result};
-
-/// Read single byte.
-#[inline]
-pub fn read_u8(reader: &mut impl Read) -> Result<u8> {
-    let mut byte = [0];
-    reader.read_exact(&mut byte).context(error::IoSnafu)?;
-    Ok(byte[0])
-}
-
-/// Like [`read_u8()`] but returns `Ok(None)` if reader has reached EOF.
-#[inline]
-pub fn try_read_u8(reader: &mut impl Read) -> Result<Option<u8>> {
-    let mut byte = [0];
-    let length = reader.read(&mut byte).context(error::IoSnafu)?;
-    Ok((length > 0).then_some(byte[0]))
-}
diff --git a/src/error.rs b/src/error.rs
deleted file mode 100644
index 02713e20..00000000
--- a/src/error.rs
+++ /dev/null
@@ -1,176 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::io;
-
-use arrow::datatypes::DataType as ArrowDataType;
-use arrow::datatypes::TimeUnit;
-use arrow::error::ArrowError;
-use snafu::prelude::*;
-use snafu::Location;
-
-use crate::proto;
-use crate::schema::DataType;
-
-// TODO: consolidate error types? better to have a smaller set?
-#[derive(Debug, Snafu)]
-#[snafu(visibility(pub))]
-pub enum OrcError {
-    #[snafu(display("Failed to read, source: {}", source))]
-    IoError {
-        source: std::io::Error,
-        #[snafu(implicit)]
-        location: Location,
-    },
-
-    #[snafu(display("Empty file"))]
-    EmptyFile {
-        #[snafu(implicit)]
-        location: Location,
-    },
-
-    #[snafu(display("Out of spec, message: {}", msg))]
-    OutOfSpec {
-        msg: String,
-        #[snafu(implicit)]
-        location: Location,
-    },
-
-    #[snafu(display("Failed to decode float, source: {}", source))]
-    DecodeFloat {
-        #[snafu(implicit)]
-        location: Location,
-        source: std::io::Error,
-    },
-
-    #[snafu(display(
-        "Overflow while decoding timestamp (seconds={}, nanoseconds={}) to {:?}",
-        seconds,
-        nanoseconds,
-        to_time_unit,
-    ))]
-    DecodeTimestamp {
-        #[snafu(implicit)]
-        location: Location,
-        seconds: i64,
-        nanoseconds: u64,
-        to_time_unit: TimeUnit,
-    },
-
-    #[snafu(display("Failed to decode proto, source: {}", source))]
-    DecodeProto {
-        #[snafu(implicit)]
-        location: Location,
-        source: prost::DecodeError,
-    },
-
-    #[snafu(display("No types found"))]
-    NoTypes {
-        #[snafu(implicit)]
-        location: Location,
-    },
-
-    #[snafu(display("unsupported type variant: {}", msg))]
-    UnsupportedTypeVariant {
-        #[snafu(implicit)]
-        location: Location,
-        msg: &'static str,
-    },
-
-    #[snafu(display(
-        "Cannot decode ORC type {:?} into Arrow type {:?}",
-        orc_type,
-        arrow_type,
-    ))]
-    MismatchedSchema {
-        #[snafu(implicit)]
-        location: Location,
-        orc_type: DataType,
-        arrow_type: ArrowDataType,
-    },
-
-    #[snafu(display("Invalid encoding for column '{}': {:?}", name, encoding))]
-    InvalidColumnEncoding {
-        #[snafu(implicit)]
-        location: Location,
-        name: String,
-        encoding: proto::column_encoding::Kind,
-    },
-
-    #[snafu(display("Failed to convert to record batch: {}", source))]
-    ConvertRecordBatch {
-        #[snafu(implicit)]
-        location: Location,
-        source: ArrowError,
-    },
-
-    #[snafu(display("Varint being decoded is too large"))]
-    VarintTooLarge {
-        #[snafu(implicit)]
-        location: Location,
-    },
-
-    #[snafu(display("unexpected: {}", msg))]
-    Unexpected {
-        #[snafu(implicit)]
-        location: Location,
-        msg: String,
-    },
-
-    #[snafu(display("Failed to build zstd decoder: {}", source))]
-    BuildZstdDecoder {
-        #[snafu(implicit)]
-        location: Location,
-        source: io::Error,
-    },
-
-    #[snafu(display("Failed to build snappy decoder: {}", source))]
-    BuildSnappyDecoder {
-        #[snafu(implicit)]
-        location: Location,
-        source: snap::Error,
-    },
-
-    #[snafu(display("Failed to build lzo decoder: {}", source))]
-    BuildLzoDecoder {
-        #[snafu(implicit)]
-        location: Location,
-        source: lzokay_native::Error,
-    },
-
-    #[snafu(display("Failed to build lz4 decoder: {}", source))]
-    BuildLz4Decoder {
-        #[snafu(implicit)]
-        location: Location,
-        source: lz4_flex::block::DecompressError,
-    },
-
-    #[snafu(display("Arrow error: {}", source))]
-    Arrow {
-        source: arrow::error::ArrowError,
-        #[snafu(implicit)]
-        location: Location,
-    },
-}
-
-pub type Result<T, E = OrcError> = std::result::Result<T, E>;
-
-impl From<OrcError> for ArrowError {
-    fn from(value: OrcError) -> Self {
-        ArrowError::ExternalError(Box::new(value))
-    }
-}
diff --git a/src/datafusion/file_format.rs b/src/file_format.rs
similarity index 87%
rename from src/datafusion/file_format.rs
rename to src/file_format.rs
index 5e0f5a76..8cacb70d 100644
--- a/src/datafusion/file_format.rs
+++ b/src/file_format.rs
@@ -20,17 +20,18 @@ use std::collections::HashMap;
 use std::fmt::Debug;
 use std::sync::Arc;
 
-use crate::reader::metadata::read_metadata_async;
 use arrow::datatypes::Schema;
 use datafusion::arrow::datatypes::SchemaRef;
 use datafusion::common::Statistics;
+use datafusion::datasource::file_format::file_compression_type::FileCompressionType;
 use datafusion::datasource::file_format::FileFormat;
 use datafusion::datasource::physical_plan::FileScanConfig;
-use datafusion::error::Result;
+use datafusion::error::{DataFusionError, Result};
 use datafusion::execution::context::SessionState;
 use datafusion::physical_plan::ExecutionPlan;
 use datafusion_physical_expr::PhysicalExpr;
 use futures::TryStreamExt;
+use orc_rust::reader::metadata::read_metadata_async;
 
 use async_trait::async_trait;
 use futures_util::StreamExt;
@@ -43,7 +44,9 @@ use super::physical_exec::OrcExec;
 async fn fetch_schema(store: &Arc<dyn ObjectStore>, file: &ObjectMeta) -> Result<(Path, Schema)> {
     let loc_path = file.location.clone();
     let mut reader = ObjectStoreReader::new(Arc::clone(store), file.clone());
-    let metadata = read_metadata_async(&mut reader).await?;
+    let metadata = read_metadata_async(&mut reader)
+        .await
+        .map_err(|e| DataFusionError::External(Box::new(e)))?;
     let schema = metadata
         .root_data_type()
         .create_arrow_schema(&HashMap::default());
@@ -65,6 +68,14 @@ impl FileFormat for OrcFormat {
         self
     }
 
+    fn get_ext(&self) -> String {
+        "orc".to_string()
+    }
+
+    fn get_ext_with_compression(&self, _compression: &FileCompressionType) -> Result<String> {
+        Ok("orc".to_string())
+    }
+
     async fn infer_schema(
         &self,
         state: &SessionState,
diff --git a/src/lib.rs b/src/lib.rs
index 6503a960..86c3ca0b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -15,62 +15,197 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! A native Rust implementation of the [Apache ORC](https://orc.apache.org) file format,
-//! providing API's to read data into [Apache Arrow](https://arrow.apache.org) in-memory arrays.
+//! Integration with [Apache DataFusion](https://datafusion.apache.org/) query engine to
+//! allow querying ORC files with a SQL/DataFrame API.
 //!
-//! # Example read usage
+//! # Example usage
 //!
 //! ```no_run
-//! # use std::fs::File;
-//! # use orc_rust::arrow_reader::ArrowReaderBuilder;
-//! let file = File::open("/path/to/file.orc").unwrap();
-//! let reader = ArrowReaderBuilder::try_new(file).unwrap().build();
-//! let record_batches = reader.collect::<Result<Vec<_>, _>>().unwrap();
-//! ```
-//!
-//! # Example write usage
+//! # use datafusion::prelude::*;
+//! # use datafusion::error::Result;
+//! # use orc_rust::datafusion::{OrcReadOptions, SessionContextOrcExt};
+//! # #[tokio::main]
+//! # async fn main() -> Result<()> {
+//! let ctx = SessionContext::new();
+//! ctx.register_orc(
+//!     "table1",
+//!     "/path/to/file.orc",
+//!     OrcReadOptions::default(),
+//! )
+//! .await?;
 //!
-//! ```no_run
-//! # use std::fs::File;
-//! # use arrow::array::RecordBatch;
-//! # use orc_rust::arrow_writer::ArrowWriterBuilder;
-//! # fn get_record_batch() -> RecordBatch {
-//! #     unimplemented!()
+//! ctx.sql("select a, b from table1")
+//!     .await?
+//!     .show()
+//!     .await?;
+//! # Ok(())
 //! # }
-//! let file = File::create("/path/to/file.orc").unwrap();
-//! let batch = get_record_batch();
-//! let mut writer = ArrowWriterBuilder::new(file, batch.schema())
-//!     .try_build()
-//!     .unwrap();
-//! writer.write(&batch).unwrap();
-//! writer.close().unwrap();
 //! ```
-//!
-//! See the [`datafusion`] module for information on how to integrate with
-//! [Apache DataFusion](https://datafusion.apache.org/).
-
-mod array_decoder;
-pub mod arrow_reader;
-pub mod arrow_writer;
-#[cfg(feature = "async")]
-pub mod async_arrow_reader;
-mod column;
-pub mod compression;
-mod encoding;
-pub mod error;
-mod memory;
-pub mod projection;
-mod proto;
-pub mod reader;
-pub mod schema;
-pub mod statistics;
-pub mod stripe;
-mod writer;
-
-pub use arrow_reader::{ArrowReader, ArrowReaderBuilder};
-pub use arrow_writer::{ArrowWriter, ArrowWriterBuilder};
-#[cfg(feature = "async")]
-pub use async_arrow_reader::ArrowStreamReader;
-
-#[cfg(feature = "datafusion")]
-pub mod datafusion;
+
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::SchemaRef;
+use datafusion::common::exec_err;
+use datafusion::config::TableOptions;
+use datafusion::dataframe::DataFrame;
+use datafusion::datasource::listing::{
+    ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
+};
+use datafusion::error::Result;
+use datafusion::execution::config::SessionConfig;
+use datafusion::execution::context::{DataFilePaths, SessionContext, SessionState};
+use datafusion::execution::options::ReadOptions;
+
+use async_trait::async_trait;
+
+use self::file_format::OrcFormat;
+
+mod file_format;
+mod object_store_reader;
+mod physical_exec;
+
+/// Configuration options for reading ORC files.
+#[derive(Clone)]
+pub struct OrcReadOptions<'a> {
+    pub file_extension: &'a str,
+}
+
+impl<'a> Default for OrcReadOptions<'a> {
+    fn default() -> Self {
+        Self {
+            file_extension: "orc",
+        }
+    }
+}
+
+#[async_trait]
+impl ReadOptions<'_> for OrcReadOptions<'_> {
+    fn to_listing_options(
+        &self,
+        _config: &SessionConfig,
+        _table_options: TableOptions,
+    ) -> ListingOptions {
+        let file_format = OrcFormat::new();
+        ListingOptions::new(Arc::new(file_format)).with_file_extension(self.file_extension)
+    }
+
+    async fn get_resolved_schema(
+        &self,
+        config: &SessionConfig,
+        state: SessionState,
+        table_path: ListingTableUrl,
+    ) -> Result<SchemaRef> {
+        self._get_resolved_schema(config, state, table_path, None)
+            .await
+    }
+}
+
+/// Exposes new functions for registering ORC tables onto a DataFusion [`SessionContext`]
+/// to enable querying them using the SQL or DataFrame API.
+pub trait SessionContextOrcExt {
+    fn read_orc<P: DataFilePaths + Send>(
+        &self,
+        table_paths: P,
+        options: OrcReadOptions<'_>,
+    ) -> impl std::future::Future<Output = Result<DataFrame>> + Send;
+
+    fn register_orc(
+        &self,
+        name: &str,
+        table_path: &str,
+        options: OrcReadOptions<'_>,
+    ) -> impl std::future::Future<Output = Result<()>> + Send;
+}
+
+impl SessionContextOrcExt for SessionContext {
+    async fn read_orc<P: DataFilePaths + Send>(
+        &self,
+        table_paths: P,
+        options: OrcReadOptions<'_>,
+    ) -> Result<DataFrame> {
+        // SessionContext::_read_type
+        let table_paths = table_paths.to_urls()?;
+        let session_config = self.copied_config();
+        let listing_options =
+            ListingOptions::new(Arc::new(OrcFormat::new())).with_file_extension(".orc");
+
+        let option_extension = listing_options.file_extension.clone();
+
+        if table_paths.is_empty() {
+            return exec_err!("No table paths were provided");
+        }
+
+        // check if the file extension matches the expected extension
+        for path in &table_paths {
+            let file_path = path.as_str();
+            if !file_path.ends_with(option_extension.clone().as_str()) && !path.is_collection() {
+                return exec_err!(
+                    "File path '{file_path}' does not match the expected extension '{option_extension}'"
+                );
+            }
+        }
+
+        let resolved_schema = options
+            .get_resolved_schema(&session_config, self.state(), table_paths[0].clone())
+            .await?;
+        let config = ListingTableConfig::new_with_multi_paths(table_paths)
+            .with_listing_options(listing_options)
+            .with_schema(resolved_schema);
+        let provider = ListingTable::try_new(config)?;
+        self.read_table(Arc::new(provider))
+    }
+
+    async fn register_orc(
+        &self,
+        name: &str,
+        table_path: &str,
+        options: OrcReadOptions<'_>,
+    ) -> Result<()> {
+        let listing_options =
+            options.to_listing_options(&self.copied_config(), self.copied_table_options());
+        self.register_listing_table(name, table_path, listing_options, None, None)
+            .await?;
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use datafusion::assert_batches_sorted_eq;
+
+    use super::*;
+
+    #[tokio::test]
+    async fn dataframe() -> Result<()> {
+        let ctx = SessionContext::new();
+        ctx.register_orc(
+            "table1",
+            "tests/basic/data/alltypes.snappy.orc",
+            OrcReadOptions::default(),
+        )
+        .await?;
+
+        let actual = ctx
+            .sql("select int16, utf8 from table1 limit 5")
+            .await?
+            .collect()
+            .await?;
+
+        assert_batches_sorted_eq!(
+            [
+                "+-------+--------+",
+                "| int16 | utf8   |",
+                "+-------+--------+",
+                "|       |        |",
+                "| -1    |        |",
+                "| 0     |        |",
+                "| 1     | a      |",
+                "| 32767 | encode |",
+                "+-------+--------+",
+            ],
+            &actual
+        );
+
+        Ok(())
+    }
+}
diff --git a/src/memory.rs b/src/memory.rs
deleted file mode 100644
index f5e5c92d..00000000
--- a/src/memory.rs
+++ /dev/null
@@ -1,23 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-/// Estimating memory usage is important when writing files, as we finish
-/// writing a stripe according to a set size threshold.
-pub trait EstimateMemory {
-    /// Approximate current memory usage in bytes.
-    fn estimate_memory_size(&self) -> usize;
-}
diff --git a/src/datafusion/mod.rs b/src/mod.rs
similarity index 99%
rename from src/datafusion/mod.rs
rename to src/mod.rs
index 6c0e92f6..d6cead83 100644
--- a/src/datafusion/mod.rs
+++ b/src/mod.rs
@@ -58,7 +58,7 @@ use datafusion::execution::options::ReadOptions;
 
 use async_trait::async_trait;
 
-use crate::error::OrcError;
+use orc_rust::error::OrcError;
 
 use self::file_format::OrcFormat;
 
diff --git a/src/datafusion/object_store_reader.rs b/src/object_store_reader.rs
similarity index 97%
rename from src/datafusion/object_store_reader.rs
rename to src/object_store_reader.rs
index 7a3e89bf..6ad84f7d 100644
--- a/src/datafusion/object_store_reader.rs
+++ b/src/object_store_reader.rs
@@ -17,10 +17,10 @@
 
 use std::sync::Arc;
 
-use crate::reader::AsyncChunkReader;
 use bytes::Bytes;
 use futures::future::BoxFuture;
 use futures::{FutureExt, TryFutureExt};
+use orc_rust::reader::AsyncChunkReader;
 
 use object_store::{ObjectMeta, ObjectStore};
 
diff --git a/src/datafusion/physical_exec.rs b/src/physical_exec.rs
similarity index 97%
rename from src/datafusion/physical_exec.rs
rename to src/physical_exec.rs
index df780cc2..2a3918fc 100644
--- a/src/datafusion/physical_exec.rs
+++ b/src/physical_exec.rs
@@ -19,8 +19,6 @@ use std::any::Any;
 use std::fmt::{self, Debug};
 use std::sync::Arc;
 
-use crate::projection::ProjectionMask;
-use crate::ArrowReaderBuilder;
 use arrow::error::ArrowError;
 use datafusion::arrow::datatypes::SchemaRef;
 use datafusion::datasource::physical_plan::{
@@ -34,6 +32,8 @@ use datafusion::physical_plan::{
     SendableRecordBatchStream,
 };
 use datafusion_physical_expr::EquivalenceProperties;
+use orc_rust::projection::ProjectionMask;
+use orc_rust::ArrowReaderBuilder;
 
 use futures_util::StreamExt;
 use object_store::ObjectStore;
@@ -76,6 +76,10 @@ impl ExecutionPlan for OrcExec {
         self
     }
 
+    fn name(&self) -> &str {
+        "OrcExec"
+    }
+
     fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
         vec![]
     }
diff --git a/src/projection.rs b/src/projection.rs
deleted file mode 100644
index 70815dbc..00000000
--- a/src/projection.rs
+++ /dev/null
@@ -1,78 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::schema::RootDataType;
-
-// TODO: be able to nest project (project columns within struct type)
-
-/// Specifies which column indices to project from an ORC type.
-#[derive(Debug, Clone)]
-pub struct ProjectionMask {
-    /// Indices of column in ORC type, can refer to nested types
-    /// (not only root level columns)
-    indices: Option<Vec<usize>>,
-}
-
-impl ProjectionMask {
-    /// Project all columns.
-    pub fn all() -> Self {
-        Self { indices: None }
-    }
-
-    /// Project only specific columns from the root type by column index.
-    pub fn roots(root_data_type: &RootDataType, indices: impl IntoIterator<Item = usize>) -> Self {
-        // TODO: return error if column index not found?
-        let input_indices = indices.into_iter().collect::<Vec<_>>();
-        // By default always project root
-        let mut indices = vec![0];
-        root_data_type
-            .children()
-            .iter()
-            .filter(|col| input_indices.contains(&col.data_type().column_index()))
-            .for_each(|col| indices.extend(col.data_type().all_indices()));
-        Self {
-            indices: Some(indices),
-        }
-    }
-
-    /// Project only specific columns from the root type by column name.
-    pub fn named_roots<T>(root_data_type: &RootDataType, names: &[T]) -> Self
-    where
-        T: AsRef<str>,
-    {
-        // TODO: return error if column name not found?
-        // By default always project root
-        let mut indices = vec![0];
-        let names = names.iter().map(AsRef::as_ref).collect::<Vec<_>>();
-        root_data_type
-            .children()
-            .iter()
-            .filter(|col| names.contains(&col.name()))
-            .for_each(|col| indices.extend(col.data_type().all_indices()));
-        Self {
-            indices: Some(indices),
-        }
-    }
-
-    /// Check if ORC column should is projected or not, by index.
-    pub fn is_index_projected(&self, index: usize) -> bool {
-        match &self.indices {
-            Some(indices) => indices.contains(&index),
-            None => true,
-        }
-    }
-}
diff --git a/src/proto.rs b/src/proto.rs
deleted file mode 100644
index ae71cdbd..00000000
--- a/src/proto.rs
+++ /dev/null
@@ -1,829 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// This file was automatically generated through the regen.sh script, and should not be edited.
-
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct IntegerStatistics {
-    #[prost(sint64, optional, tag = "1")]
-    pub minimum: ::core::option::Option<i64>,
-    #[prost(sint64, optional, tag = "2")]
-    pub maximum: ::core::option::Option<i64>,
-    #[prost(sint64, optional, tag = "3")]
-    pub sum: ::core::option::Option<i64>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct DoubleStatistics {
-    #[prost(double, optional, tag = "1")]
-    pub minimum: ::core::option::Option<f64>,
-    #[prost(double, optional, tag = "2")]
-    pub maximum: ::core::option::Option<f64>,
-    #[prost(double, optional, tag = "3")]
-    pub sum: ::core::option::Option<f64>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct StringStatistics {
-    #[prost(string, optional, tag = "1")]
-    pub minimum: ::core::option::Option<::prost::alloc::string::String>,
-    #[prost(string, optional, tag = "2")]
-    pub maximum: ::core::option::Option<::prost::alloc::string::String>,
-    /// sum will store the total length of all strings in a stripe
-    #[prost(sint64, optional, tag = "3")]
-    pub sum: ::core::option::Option<i64>,
-    /// If the minimum or maximum value was longer than 1024 bytes, store a lower or upper
-    /// bound instead of the minimum or maximum values above.
-    #[prost(string, optional, tag = "4")]
-    pub lower_bound: ::core::option::Option<::prost::alloc::string::String>,
-    #[prost(string, optional, tag = "5")]
-    pub upper_bound: ::core::option::Option<::prost::alloc::string::String>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct BucketStatistics {
-    #[prost(uint64, repeated, tag = "1")]
-    pub count: ::prost::alloc::vec::Vec<u64>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct DecimalStatistics {
-    #[prost(string, optional, tag = "1")]
-    pub minimum: ::core::option::Option<::prost::alloc::string::String>,
-    #[prost(string, optional, tag = "2")]
-    pub maximum: ::core::option::Option<::prost::alloc::string::String>,
-    #[prost(string, optional, tag = "3")]
-    pub sum: ::core::option::Option<::prost::alloc::string::String>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct DateStatistics {
-    /// min,max values saved as days since epoch
-    #[prost(sint32, optional, tag = "1")]
-    pub minimum: ::core::option::Option<i32>,
-    #[prost(sint32, optional, tag = "2")]
-    pub maximum: ::core::option::Option<i32>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct TimestampStatistics {
-    /// min,max values saved as milliseconds since epoch
-    #[prost(sint64, optional, tag = "1")]
-    pub minimum: ::core::option::Option<i64>,
-    #[prost(sint64, optional, tag = "2")]
-    pub maximum: ::core::option::Option<i64>,
-    #[prost(sint64, optional, tag = "3")]
-    pub minimum_utc: ::core::option::Option<i64>,
-    #[prost(sint64, optional, tag = "4")]
-    pub maximum_utc: ::core::option::Option<i64>,
-    /// store the lower 6 TS digits for min/max to achieve nanosecond precision
-    #[prost(int32, optional, tag = "5")]
-    pub minimum_nanos: ::core::option::Option<i32>,
-    #[prost(int32, optional, tag = "6")]
-    pub maximum_nanos: ::core::option::Option<i32>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct BinaryStatistics {
-    /// sum will store the total binary blob length in a stripe
-    #[prost(sint64, optional, tag = "1")]
-    pub sum: ::core::option::Option<i64>,
-}
-/// Statistics for list and map
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct CollectionStatistics {
-    #[prost(uint64, optional, tag = "1")]
-    pub min_children: ::core::option::Option<u64>,
-    #[prost(uint64, optional, tag = "2")]
-    pub max_children: ::core::option::Option<u64>,
-    #[prost(uint64, optional, tag = "3")]
-    pub total_children: ::core::option::Option<u64>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct ColumnStatistics {
-    #[prost(uint64, optional, tag = "1")]
-    pub number_of_values: ::core::option::Option<u64>,
-    #[prost(message, optional, tag = "2")]
-    pub int_statistics: ::core::option::Option<IntegerStatistics>,
-    #[prost(message, optional, tag = "3")]
-    pub double_statistics: ::core::option::Option<DoubleStatistics>,
-    #[prost(message, optional, tag = "4")]
-    pub string_statistics: ::core::option::Option<StringStatistics>,
-    #[prost(message, optional, tag = "5")]
-    pub bucket_statistics: ::core::option::Option<BucketStatistics>,
-    #[prost(message, optional, tag = "6")]
-    pub decimal_statistics: ::core::option::Option<DecimalStatistics>,
-    #[prost(message, optional, tag = "7")]
-    pub date_statistics: ::core::option::Option<DateStatistics>,
-    #[prost(message, optional, tag = "8")]
-    pub binary_statistics: ::core::option::Option<BinaryStatistics>,
-    #[prost(message, optional, tag = "9")]
-    pub timestamp_statistics: ::core::option::Option<TimestampStatistics>,
-    #[prost(bool, optional, tag = "10")]
-    pub has_null: ::core::option::Option<bool>,
-    #[prost(uint64, optional, tag = "11")]
-    pub bytes_on_disk: ::core::option::Option<u64>,
-    #[prost(message, optional, tag = "12")]
-    pub collection_statistics: ::core::option::Option<CollectionStatistics>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct RowIndexEntry {
-    #[prost(uint64, repeated, tag = "1")]
-    pub positions: ::prost::alloc::vec::Vec<u64>,
-    #[prost(message, optional, tag = "2")]
-    pub statistics: ::core::option::Option<ColumnStatistics>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct RowIndex {
-    #[prost(message, repeated, tag = "1")]
-    pub entry: ::prost::alloc::vec::Vec<RowIndexEntry>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct BloomFilter {
-    #[prost(uint32, optional, tag = "1")]
-    pub num_hash_functions: ::core::option::Option<u32>,
-    #[prost(fixed64, repeated, packed = "false", tag = "2")]
-    pub bitset: ::prost::alloc::vec::Vec<u64>,
-    #[prost(bytes = "vec", optional, tag = "3")]
-    pub utf8bitset: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct BloomFilterIndex {
-    #[prost(message, repeated, tag = "1")]
-    pub bloom_filter: ::prost::alloc::vec::Vec<BloomFilter>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct Stream {
-    #[prost(enumeration = "stream::Kind", optional, tag = "1")]
-    pub kind: ::core::option::Option<i32>,
-    #[prost(uint32, optional, tag = "2")]
-    pub column: ::core::option::Option<u32>,
-    #[prost(uint64, optional, tag = "3")]
-    pub length: ::core::option::Option<u64>,
-}
-/// Nested message and enum types in `Stream`.
-pub mod stream {
-    /// if you add new index stream kinds, you need to make sure to update
-    /// StreamName to ensure it is added to the stripe in the right area
-    #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
-    #[repr(i32)]
-    pub enum Kind {
-        Present = 0,
-        Data = 1,
-        Length = 2,
-        DictionaryData = 3,
-        DictionaryCount = 4,
-        Secondary = 5,
-        RowIndex = 6,
-        BloomFilter = 7,
-        BloomFilterUtf8 = 8,
-        /// Virtual stream kinds to allocate space for encrypted index and data.
-        EncryptedIndex = 9,
-        EncryptedData = 10,
-        /// stripe statistics streams
-        StripeStatistics = 100,
-        /// A virtual stream kind that is used for setting the encryption IV.
-        FileStatistics = 101,
-    }
-    impl Kind {
-        /// String value of the enum field names used in the ProtoBuf definition.
-        ///
-        /// The values are not transformed in any way and thus are considered stable
-        /// (if the ProtoBuf definition does not change) and safe for programmatic use.
-        pub fn as_str_name(&self) -> &'static str {
-            match self {
-                Kind::Present => "PRESENT",
-                Kind::Data => "DATA",
-                Kind::Length => "LENGTH",
-                Kind::DictionaryData => "DICTIONARY_DATA",
-                Kind::DictionaryCount => "DICTIONARY_COUNT",
-                Kind::Secondary => "SECONDARY",
-                Kind::RowIndex => "ROW_INDEX",
-                Kind::BloomFilter => "BLOOM_FILTER",
-                Kind::BloomFilterUtf8 => "BLOOM_FILTER_UTF8",
-                Kind::EncryptedIndex => "ENCRYPTED_INDEX",
-                Kind::EncryptedData => "ENCRYPTED_DATA",
-                Kind::StripeStatistics => "STRIPE_STATISTICS",
-                Kind::FileStatistics => "FILE_STATISTICS",
-            }
-        }
-        /// Creates an enum from field names used in the ProtoBuf definition.
-        pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
-            match value {
-                "PRESENT" => Some(Self::Present),
-                "DATA" => Some(Self::Data),
-                "LENGTH" => Some(Self::Length),
-                "DICTIONARY_DATA" => Some(Self::DictionaryData),
-                "DICTIONARY_COUNT" => Some(Self::DictionaryCount),
-                "SECONDARY" => Some(Self::Secondary),
-                "ROW_INDEX" => Some(Self::RowIndex),
-                "BLOOM_FILTER" => Some(Self::BloomFilter),
-                "BLOOM_FILTER_UTF8" => Some(Self::BloomFilterUtf8),
-                "ENCRYPTED_INDEX" => Some(Self::EncryptedIndex),
-                "ENCRYPTED_DATA" => Some(Self::EncryptedData),
-                "STRIPE_STATISTICS" => Some(Self::StripeStatistics),
-                "FILE_STATISTICS" => Some(Self::FileStatistics),
-                _ => None,
-            }
-        }
-    }
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct ColumnEncoding {
-    #[prost(enumeration = "column_encoding::Kind", optional, tag = "1")]
-    pub kind: ::core::option::Option<i32>,
-    #[prost(uint32, optional, tag = "2")]
-    pub dictionary_size: ::core::option::Option<u32>,
-    /// The encoding of the bloom filters for this column:
-    ///    0 or missing = none or original
-    ///    1            = ORC-135 (utc for timestamps)
-    #[prost(uint32, optional, tag = "3")]
-    pub bloom_encoding: ::core::option::Option<u32>,
-}
-/// Nested message and enum types in `ColumnEncoding`.
-pub mod column_encoding {
-    #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
-    #[repr(i32)]
-    pub enum Kind {
-        Direct = 0,
-        Dictionary = 1,
-        DirectV2 = 2,
-        DictionaryV2 = 3,
-    }
-    impl Kind {
-        /// String value of the enum field names used in the ProtoBuf definition.
-        ///
-        /// The values are not transformed in any way and thus are considered stable
-        /// (if the ProtoBuf definition does not change) and safe for programmatic use.
-        pub fn as_str_name(&self) -> &'static str {
-            match self {
-                Kind::Direct => "DIRECT",
-                Kind::Dictionary => "DICTIONARY",
-                Kind::DirectV2 => "DIRECT_V2",
-                Kind::DictionaryV2 => "DICTIONARY_V2",
-            }
-        }
-        /// Creates an enum from field names used in the ProtoBuf definition.
-        pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
-            match value {
-                "DIRECT" => Some(Self::Direct),
-                "DICTIONARY" => Some(Self::Dictionary),
-                "DIRECT_V2" => Some(Self::DirectV2),
-                "DICTIONARY_V2" => Some(Self::DictionaryV2),
-                _ => None,
-            }
-        }
-    }
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct StripeEncryptionVariant {
-    #[prost(message, repeated, tag = "1")]
-    pub streams: ::prost::alloc::vec::Vec<Stream>,
-    #[prost(message, repeated, tag = "2")]
-    pub encoding: ::prost::alloc::vec::Vec<ColumnEncoding>,
-}
-// each stripe looks like:
-//    index streams
-//      unencrypted
-//      variant 1..N
-//    data streams
-//      unencrypted
-//      variant 1..N
-//    footer
-
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct StripeFooter {
-    #[prost(message, repeated, tag = "1")]
-    pub streams: ::prost::alloc::vec::Vec<Stream>,
-    #[prost(message, repeated, tag = "2")]
-    pub columns: ::prost::alloc::vec::Vec<ColumnEncoding>,
-    #[prost(string, optional, tag = "3")]
-    pub writer_timezone: ::core::option::Option<::prost::alloc::string::String>,
-    /// one for each column encryption variant
-    #[prost(message, repeated, tag = "4")]
-    pub encryption: ::prost::alloc::vec::Vec<StripeEncryptionVariant>,
-}
-// the file tail looks like:
-//    encrypted stripe statistics: ColumnarStripeStatistics (order by variant)
-//    stripe statistics: Metadata
-//    footer: Footer
-//    postscript: PostScript
-//    psLen: byte
-
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct StringPair {
-    #[prost(string, optional, tag = "1")]
-    pub key: ::core::option::Option<::prost::alloc::string::String>,
-    #[prost(string, optional, tag = "2")]
-    pub value: ::core::option::Option<::prost::alloc::string::String>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct Type {
-    #[prost(enumeration = "r#type::Kind", optional, tag = "1")]
-    pub kind: ::core::option::Option<i32>,
-    #[prost(uint32, repeated, tag = "2")]
-    pub subtypes: ::prost::alloc::vec::Vec<u32>,
-    #[prost(string, repeated, tag = "3")]
-    pub field_names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
-    #[prost(uint32, optional, tag = "4")]
-    pub maximum_length: ::core::option::Option<u32>,
-    #[prost(uint32, optional, tag = "5")]
-    pub precision: ::core::option::Option<u32>,
-    #[prost(uint32, optional, tag = "6")]
-    pub scale: ::core::option::Option<u32>,
-    #[prost(message, repeated, tag = "7")]
-    pub attributes: ::prost::alloc::vec::Vec<StringPair>,
-}
-/// Nested message and enum types in `Type`.
-pub mod r#type {
-    #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
-    #[repr(i32)]
-    pub enum Kind {
-        Boolean = 0,
-        Byte = 1,
-        Short = 2,
-        Int = 3,
-        Long = 4,
-        Float = 5,
-        Double = 6,
-        String = 7,
-        Binary = 8,
-        Timestamp = 9,
-        List = 10,
-        Map = 11,
-        Struct = 12,
-        Union = 13,
-        Decimal = 14,
-        Date = 15,
-        Varchar = 16,
-        Char = 17,
-        TimestampInstant = 18,
-    }
-    impl Kind {
-        /// String value of the enum field names used in the ProtoBuf definition.
-        ///
-        /// The values are not transformed in any way and thus are considered stable
-        /// (if the ProtoBuf definition does not change) and safe for programmatic use.
-        pub fn as_str_name(&self) -> &'static str {
-            match self {
-                Kind::Boolean => "BOOLEAN",
-                Kind::Byte => "BYTE",
-                Kind::Short => "SHORT",
-                Kind::Int => "INT",
-                Kind::Long => "LONG",
-                Kind::Float => "FLOAT",
-                Kind::Double => "DOUBLE",
-                Kind::String => "STRING",
-                Kind::Binary => "BINARY",
-                Kind::Timestamp => "TIMESTAMP",
-                Kind::List => "LIST",
-                Kind::Map => "MAP",
-                Kind::Struct => "STRUCT",
-                Kind::Union => "UNION",
-                Kind::Decimal => "DECIMAL",
-                Kind::Date => "DATE",
-                Kind::Varchar => "VARCHAR",
-                Kind::Char => "CHAR",
-                Kind::TimestampInstant => "TIMESTAMP_INSTANT",
-            }
-        }
-        /// Creates an enum from field names used in the ProtoBuf definition.
-        pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
-            match value {
-                "BOOLEAN" => Some(Self::Boolean),
-                "BYTE" => Some(Self::Byte),
-                "SHORT" => Some(Self::Short),
-                "INT" => Some(Self::Int),
-                "LONG" => Some(Self::Long),
-                "FLOAT" => Some(Self::Float),
-                "DOUBLE" => Some(Self::Double),
-                "STRING" => Some(Self::String),
-                "BINARY" => Some(Self::Binary),
-                "TIMESTAMP" => Some(Self::Timestamp),
-                "LIST" => Some(Self::List),
-                "MAP" => Some(Self::Map),
-                "STRUCT" => Some(Self::Struct),
-                "UNION" => Some(Self::Union),
-                "DECIMAL" => Some(Self::Decimal),
-                "DATE" => Some(Self::Date),
-                "VARCHAR" => Some(Self::Varchar),
-                "CHAR" => Some(Self::Char),
-                "TIMESTAMP_INSTANT" => Some(Self::TimestampInstant),
-                _ => None,
-            }
-        }
-    }
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct StripeInformation {
-    /// the global file offset of the start of the stripe
-    #[prost(uint64, optional, tag = "1")]
-    pub offset: ::core::option::Option<u64>,
-    /// the number of bytes of index
-    #[prost(uint64, optional, tag = "2")]
-    pub index_length: ::core::option::Option<u64>,
-    /// the number of bytes of data
-    #[prost(uint64, optional, tag = "3")]
-    pub data_length: ::core::option::Option<u64>,
-    /// the number of bytes in the stripe footer
-    #[prost(uint64, optional, tag = "4")]
-    pub footer_length: ::core::option::Option<u64>,
-    /// the number of rows in this stripe
-    #[prost(uint64, optional, tag = "5")]
-    pub number_of_rows: ::core::option::Option<u64>,
-    /// If this is present, the reader should use this value for the encryption
-    /// stripe id for setting the encryption IV. Otherwise, the reader should
-    /// use one larger than the previous stripe's encryptStripeId.
-    /// For unmerged ORC files, the first stripe will use 1 and the rest of the
-    /// stripes won't have it set. For merged files, the stripe information
-    /// will be copied from their original files and thus the first stripe of
-    /// each of the input files will reset it to 1.
-    /// Note that 1 was choosen, because protobuf v3 doesn't serialize
-    /// primitive types that are the default (eg. 0).
-    #[prost(uint64, optional, tag = "6")]
-    pub encrypt_stripe_id: ::core::option::Option<u64>,
-    /// For each encryption variant, the new encrypted local key to use
-    /// until we find a replacement.
-    #[prost(bytes = "vec", repeated, tag = "7")]
-    pub encrypted_local_keys: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec<u8>>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct UserMetadataItem {
-    #[prost(string, optional, tag = "1")]
-    pub name: ::core::option::Option<::prost::alloc::string::String>,
-    #[prost(bytes = "vec", optional, tag = "2")]
-    pub value: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
-}
-/// StripeStatistics (1 per a stripe), which each contain the
-/// ColumnStatistics for each column.
-/// This message type is only used in ORC v0 and v1.
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct StripeStatistics {
-    #[prost(message, repeated, tag = "1")]
-    pub col_stats: ::prost::alloc::vec::Vec<ColumnStatistics>,
-}
-/// This message type is only used in ORC v0 and v1.
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct Metadata {
-    #[prost(message, repeated, tag = "1")]
-    pub stripe_stats: ::prost::alloc::vec::Vec<StripeStatistics>,
-}
-/// In ORC v2 (and for encrypted columns in v1), each column has
-/// their column statistics written separately.
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct ColumnarStripeStatistics {
-    /// one value for each stripe in the file
-    #[prost(message, repeated, tag = "1")]
-    pub col_stats: ::prost::alloc::vec::Vec<ColumnStatistics>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct FileStatistics {
-    #[prost(message, repeated, tag = "1")]
-    pub column: ::prost::alloc::vec::Vec<ColumnStatistics>,
-}
-/// How was the data masked? This isn't necessary for reading the file, but
-/// is documentation about how the file was written.
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct DataMask {
-    /// the kind of masking, which may include third party masks
-    #[prost(string, optional, tag = "1")]
-    pub name: ::core::option::Option<::prost::alloc::string::String>,
-    /// parameters for the mask
-    #[prost(string, repeated, tag = "2")]
-    pub mask_parameters: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
-    /// the unencrypted column roots this mask was applied to
-    #[prost(uint32, repeated, tag = "3")]
-    pub columns: ::prost::alloc::vec::Vec<u32>,
-}
-/// Information about the encryption keys.
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct EncryptionKey {
-    #[prost(string, optional, tag = "1")]
-    pub key_name: ::core::option::Option<::prost::alloc::string::String>,
-    #[prost(uint32, optional, tag = "2")]
-    pub key_version: ::core::option::Option<u32>,
-    #[prost(enumeration = "EncryptionAlgorithm", optional, tag = "3")]
-    pub algorithm: ::core::option::Option<i32>,
-}
-/// The description of an encryption variant.
-/// Each variant is a single subtype that is encrypted with a single key.
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct EncryptionVariant {
-    /// the column id of the root
-    #[prost(uint32, optional, tag = "1")]
-    pub root: ::core::option::Option<u32>,
-    /// The master key that was used to encrypt the local key, referenced as
-    /// an index into the Encryption.key list.
-    #[prost(uint32, optional, tag = "2")]
-    pub key: ::core::option::Option<u32>,
-    /// the encrypted key for the file footer
-    #[prost(bytes = "vec", optional, tag = "3")]
-    pub encrypted_key: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
-    /// the stripe statistics for this variant
-    #[prost(message, repeated, tag = "4")]
-    pub stripe_statistics: ::prost::alloc::vec::Vec<Stream>,
-    /// encrypted file statistics as a FileStatistics
-    #[prost(bytes = "vec", optional, tag = "5")]
-    pub file_statistics: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct Encryption {
-    /// all of the masks used in this file
-    #[prost(message, repeated, tag = "1")]
-    pub mask: ::prost::alloc::vec::Vec<DataMask>,
-    /// all of the keys used in this file
-    #[prost(message, repeated, tag = "2")]
-    pub key: ::prost::alloc::vec::Vec<EncryptionKey>,
-    /// The encrypted variants.
-    /// Readers should prefer the first variant that the user has access to
-    /// the corresponding key. If they don't have access to any of the keys,
-    /// they should get the unencrypted masked data.
-    #[prost(message, repeated, tag = "3")]
-    pub variants: ::prost::alloc::vec::Vec<EncryptionVariant>,
-    /// How are the local keys encrypted?
-    #[prost(enumeration = "KeyProviderKind", optional, tag = "4")]
-    pub key_provider: ::core::option::Option<i32>,
-}
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct Footer {
-    #[prost(uint64, optional, tag = "1")]
-    pub header_length: ::core::option::Option<u64>,
-    #[prost(uint64, optional, tag = "2")]
-    pub content_length: ::core::option::Option<u64>,
-    #[prost(message, repeated, tag = "3")]
-    pub stripes: ::prost::alloc::vec::Vec<StripeInformation>,
-    #[prost(message, repeated, tag = "4")]
-    pub types: ::prost::alloc::vec::Vec<Type>,
-    #[prost(message, repeated, tag = "5")]
-    pub metadata: ::prost::alloc::vec::Vec<UserMetadataItem>,
-    #[prost(uint64, optional, tag = "6")]
-    pub number_of_rows: ::core::option::Option<u64>,
-    #[prost(message, repeated, tag = "7")]
-    pub statistics: ::prost::alloc::vec::Vec<ColumnStatistics>,
-    #[prost(uint32, optional, tag = "8")]
-    pub row_index_stride: ::core::option::Option<u32>,
-    /// Each implementation that writes ORC files should register for a code
-    /// 0 = ORC Java
-    /// 1 = ORC C++
-    /// 2 = Presto
-    /// 3 = Scritchley Go from <https://github.com/scritchley/orc>
-    /// 4 = Trino
-    #[prost(uint32, optional, tag = "9")]
-    pub writer: ::core::option::Option<u32>,
-    /// information about the encryption in this file
-    #[prost(message, optional, tag = "10")]
-    pub encryption: ::core::option::Option<Encryption>,
-    #[prost(enumeration = "CalendarKind", optional, tag = "11")]
-    pub calendar: ::core::option::Option<i32>,
-    /// informative description about the version of the software that wrote
-    /// the file. It is assumed to be within a given writer, so for example
-    /// ORC 1.7.2 = "1.7.2". It may include suffixes, such as "-SNAPSHOT".
-    #[prost(string, optional, tag = "12")]
-    pub software_version: ::core::option::Option<::prost::alloc::string::String>,
-}
-/// Serialized length must be less that 255 bytes
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct PostScript {
-    #[prost(uint64, optional, tag = "1")]
-    pub footer_length: ::core::option::Option<u64>,
-    #[prost(enumeration = "CompressionKind", optional, tag = "2")]
-    pub compression: ::core::option::Option<i32>,
-    #[prost(uint64, optional, tag = "3")]
-    pub compression_block_size: ::core::option::Option<u64>,
-    /// the version of the file format
-    ///    \[0, 11\] = Hive 0.11
-    ///    \[0, 12\] = Hive 0.12
-    #[prost(uint32, repeated, tag = "4")]
-    pub version: ::prost::alloc::vec::Vec<u32>,
-    #[prost(uint64, optional, tag = "5")]
-    pub metadata_length: ::core::option::Option<u64>,
-    /// The version of the writer that wrote the file. This number is
-    /// updated when we make fixes or large changes to the writer so that
-    /// readers can detect whether a given bug is present in the data.
-    ///
-    /// Only the Java ORC writer may use values under 6 (or missing) so that
-    /// readers that predate ORC-202 treat the new writers correctly. Each
-    /// writer should assign their own sequence of versions starting from 6.
-    ///
-    /// Version of the ORC Java writer:
-    ///    0 = original
-    ///    1 = HIVE-8732 fixed (fixed stripe/file maximum statistics &
-    ///                         string statistics use utf8 for min/max)
-    ///    2 = HIVE-4243 fixed (use real column names from Hive tables)
-    ///    3 = HIVE-12055 added (vectorized writer implementation)
-    ///    4 = HIVE-13083 fixed (decimals write present stream correctly)
-    ///    5 = ORC-101 fixed (bloom filters use utf8 consistently)
-    ///    6 = ORC-135 fixed (timestamp statistics use utc)
-    ///    7 = ORC-517 fixed (decimal64 min/max incorrect)
-    ///    8 = ORC-203 added (trim very long string statistics)
-    ///    9 = ORC-14 added (column encryption)
-    ///
-    /// Version of the ORC C++ writer:
-    ///    6 = original
-    ///
-    /// Version of the Presto writer:
-    ///    6 = original
-    ///
-    /// Version of the Scritchley Go writer:
-    ///    6 = original
-    ///
-    /// Version of the Trino writer:
-    ///    6 = original
-    ///
-    #[prost(uint32, optional, tag = "6")]
-    pub writer_version: ::core::option::Option<u32>,
-    /// the number of bytes in the encrypted stripe statistics
-    #[prost(uint64, optional, tag = "7")]
-    pub stripe_statistics_length: ::core::option::Option<u64>,
-    /// Leave this last in the record
-    #[prost(string, optional, tag = "8000")]
-    pub magic: ::core::option::Option<::prost::alloc::string::String>,
-}
-/// The contents of the file tail that must be serialized.
-/// This gets serialized as part of OrcSplit, also used by footer cache.
-#[allow(clippy::derive_partial_eq_without_eq)]
-#[derive(Clone, PartialEq, ::prost::Message)]
-pub struct FileTail {
-    #[prost(message, optional, tag = "1")]
-    pub postscript: ::core::option::Option<PostScript>,
-    #[prost(message, optional, tag = "2")]
-    pub footer: ::core::option::Option<Footer>,
-    #[prost(uint64, optional, tag = "3")]
-    pub file_length: ::core::option::Option<u64>,
-    #[prost(uint64, optional, tag = "4")]
-    pub postscript_length: ::core::option::Option<u64>,
-}
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
-#[repr(i32)]
-pub enum EncryptionAlgorithm {
-    /// used for detecting future algorithms
-    UnknownEncryption = 0,
-    AesCtr128 = 1,
-    AesCtr256 = 2,
-}
-impl EncryptionAlgorithm {
-    /// String value of the enum field names used in the ProtoBuf definition.
-    ///
-    /// The values are not transformed in any way and thus are considered stable
-    /// (if the ProtoBuf definition does not change) and safe for programmatic use.
-    pub fn as_str_name(&self) -> &'static str {
-        match self {
-            EncryptionAlgorithm::UnknownEncryption => "UNKNOWN_ENCRYPTION",
-            EncryptionAlgorithm::AesCtr128 => "AES_CTR_128",
-            EncryptionAlgorithm::AesCtr256 => "AES_CTR_256",
-        }
-    }
-    /// Creates an enum from field names used in the ProtoBuf definition.
-    pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
-        match value {
-            "UNKNOWN_ENCRYPTION" => Some(Self::UnknownEncryption),
-            "AES_CTR_128" => Some(Self::AesCtr128),
-            "AES_CTR_256" => Some(Self::AesCtr256),
-            _ => None,
-        }
-    }
-}
-/// Which KeyProvider encrypted the local keys.
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
-#[repr(i32)]
-pub enum KeyProviderKind {
-    Unknown = 0,
-    Hadoop = 1,
-    Aws = 2,
-    Gcp = 3,
-    Azure = 4,
-}
-impl KeyProviderKind {
-    /// String value of the enum field names used in the ProtoBuf definition.
-    ///
-    /// The values are not transformed in any way and thus are considered stable
-    /// (if the ProtoBuf definition does not change) and safe for programmatic use.
-    pub fn as_str_name(&self) -> &'static str {
-        match self {
-            KeyProviderKind::Unknown => "UNKNOWN",
-            KeyProviderKind::Hadoop => "HADOOP",
-            KeyProviderKind::Aws => "AWS",
-            KeyProviderKind::Gcp => "GCP",
-            KeyProviderKind::Azure => "AZURE",
-        }
-    }
-    /// Creates an enum from field names used in the ProtoBuf definition.
-    pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
-        match value {
-            "UNKNOWN" => Some(Self::Unknown),
-            "HADOOP" => Some(Self::Hadoop),
-            "AWS" => Some(Self::Aws),
-            "GCP" => Some(Self::Gcp),
-            "AZURE" => Some(Self::Azure),
-            _ => None,
-        }
-    }
-}
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
-#[repr(i32)]
-pub enum CalendarKind {
-    UnknownCalendar = 0,
-    /// A hybrid Julian/Gregorian calendar with a cutover point in October 1582.
-    JulianGregorian = 1,
-    /// A calendar that extends the Gregorian calendar back forever.
-    ProlepticGregorian = 2,
-}
-impl CalendarKind {
-    /// String value of the enum field names used in the ProtoBuf definition.
-    ///
-    /// The values are not transformed in any way and thus are considered stable
-    /// (if the ProtoBuf definition does not change) and safe for programmatic use.
-    pub fn as_str_name(&self) -> &'static str {
-        match self {
-            CalendarKind::UnknownCalendar => "UNKNOWN_CALENDAR",
-            CalendarKind::JulianGregorian => "JULIAN_GREGORIAN",
-            CalendarKind::ProlepticGregorian => "PROLEPTIC_GREGORIAN",
-        }
-    }
-    /// Creates an enum from field names used in the ProtoBuf definition.
-    pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
-        match value {
-            "UNKNOWN_CALENDAR" => Some(Self::UnknownCalendar),
-            "JULIAN_GREGORIAN" => Some(Self::JulianGregorian),
-            "PROLEPTIC_GREGORIAN" => Some(Self::ProlepticGregorian),
-            _ => None,
-        }
-    }
-}
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
-#[repr(i32)]
-pub enum CompressionKind {
-    None = 0,
-    Zlib = 1,
-    Snappy = 2,
-    Lzo = 3,
-    Lz4 = 4,
-    Zstd = 5,
-}
-impl CompressionKind {
-    /// String value of the enum field names used in the ProtoBuf definition.
-    ///
-    /// The values are not transformed in any way and thus are considered stable
-    /// (if the ProtoBuf definition does not change) and safe for programmatic use.
-    pub fn as_str_name(&self) -> &'static str {
-        match self {
-            CompressionKind::None => "NONE",
-            CompressionKind::Zlib => "ZLIB",
-            CompressionKind::Snappy => "SNAPPY",
-            CompressionKind::Lzo => "LZO",
-            CompressionKind::Lz4 => "LZ4",
-            CompressionKind::Zstd => "ZSTD",
-        }
-    }
-    /// Creates an enum from field names used in the ProtoBuf definition.
-    pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
-        match value {
-            "NONE" => Some(Self::None),
-            "ZLIB" => Some(Self::Zlib),
-            "SNAPPY" => Some(Self::Snappy),
-            "LZO" => Some(Self::Lzo),
-            "LZ4" => Some(Self::Lz4),
-            "ZSTD" => Some(Self::Zstd),
-            _ => None,
-        }
-    }
-}
diff --git a/src/reader/metadata.rs b/src/reader/metadata.rs
deleted file mode 100644
index 64686974..00000000
--- a/src/reader/metadata.rs
+++ /dev/null
@@ -1,324 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Parse ORC file tail metadata structures from file.
-//!
-//! File tail structure:
-//!
-//! ```text
-//! ------------------
-//! |    Metadata    |
-//! |                |
-//! ------------------
-//! |     Footer     |
-//! |                |
-//! ------------------
-//! |  Postscript  |X|
-//! ------------------
-//! ```
-//!
-//! Where X is last byte in file indicating
-//! Postscript length in bytes.
-//!
-//! Footer and Metadata lengths are encoded in Postscript.
-//! Postscript is never compressed, Footer and Metadata
-//! may be compressed depending Postscript config value.
-//!
-//! If they are compressed then their lengths indicate their
-//! compressed lengths.
-
-use std::collections::HashMap;
-use std::io::Read;
-
-use bytes::{Bytes, BytesMut};
-use prost::Message;
-use snafu::{ensure, OptionExt, ResultExt};
-
-use crate::compression::{Compression, Decompressor};
-use crate::error::{self, EmptyFileSnafu, OutOfSpecSnafu, Result};
-use crate::proto::{self, Footer, Metadata, PostScript};
-use crate::schema::RootDataType;
-use crate::statistics::ColumnStatistics;
-use crate::stripe::StripeMetadata;
-
-use crate::reader::ChunkReader;
-
-const DEFAULT_FOOTER_SIZE: u64 = 16 * 1024;
-
-/// The file's metadata.
-#[derive(Debug, Clone)]
-pub struct FileMetadata {
-    compression: Option<Compression>,
-    root_data_type: RootDataType,
-    number_of_rows: u64,
-    file_format_version: String,
-    /// Statistics of columns across entire file
-    column_statistics: Vec<ColumnStatistics>,
-    stripes: Vec<StripeMetadata>,
-    user_custom_metadata: HashMap<String, Vec<u8>>,
-}
-
-impl FileMetadata {
-    fn from_proto(
-        postscript: &proto::PostScript,
-        footer: &proto::Footer,
-        metadata: &proto::Metadata,
-    ) -> Result<Self> {
-        let compression =
-            Compression::from_proto(postscript.compression(), postscript.compression_block_size);
-        let root_data_type = RootDataType::from_proto(&footer.types)?;
-        let number_of_rows = footer.number_of_rows();
-        let column_statistics = footer
-            .statistics
-            .iter()
-            .map(TryFrom::try_from)
-            .collect::<Result<Vec<_>>>()?;
-        ensure!(
-            metadata.stripe_stats.is_empty() || metadata.stripe_stats.len() == footer.stripes.len(),
-            OutOfSpecSnafu {
-                msg: "stripe stats length must equal the number of stripes"
-            }
-        );
-        // TODO: confirm if this is valid
-        let stripes = if metadata.stripe_stats.is_empty() {
-            footer
-                .stripes
-                .iter()
-                .map(TryFrom::try_from)
-                .collect::<Result<Vec<_>>>()?
-        } else {
-            footer
-                .stripes
-                .iter()
-                .zip(metadata.stripe_stats.iter())
-                .map(TryFrom::try_from)
-                .collect::<Result<Vec<_>>>()?
-        };
-        let user_custom_metadata = footer
-            .metadata
-            .iter()
-            .map(|kv| (kv.name().to_owned(), kv.value().to_vec()))
-            .collect::<HashMap<_, _>>();
-
-        let file_format_version = postscript
-            .version
-            .iter()
-            .map(|v| v.to_string() + ".")
-            .collect::<String>();
-        let file_format_version = file_format_version
-            .strip_suffix('.')
-            .unwrap_or("")
-            .to_string();
-
-        Ok(Self {
-            compression,
-            root_data_type,
-            number_of_rows,
-            file_format_version,
-            column_statistics,
-            stripes,
-            user_custom_metadata,
-        })
-    }
-
-    pub fn number_of_rows(&self) -> u64 {
-        self.number_of_rows
-    }
-
-    pub fn compression(&self) -> Option<Compression> {
-        self.compression
-    }
-
-    pub fn root_data_type(&self) -> &RootDataType {
-        &self.root_data_type
-    }
-
-    pub fn column_file_statistics(&self) -> &[ColumnStatistics] {
-        &self.column_statistics
-    }
-
-    pub fn stripe_metadatas(&self) -> &[StripeMetadata] {
-        &self.stripes
-    }
-
-    pub fn user_custom_metadata(&self) -> &HashMap<String, Vec<u8>> {
-        &self.user_custom_metadata
-    }
-
-    pub fn file_format_version(&self) -> &str {
-        &self.file_format_version
-    }
-}
-
-pub fn read_metadata<R: ChunkReader>(reader: &mut R) -> Result<FileMetadata> {
-    let file_len = reader.len();
-    if file_len == 0 {
-        return EmptyFileSnafu.fail();
-    }
-
-    // Initial read of the file tail
-    // Use a default size for first read in hopes of capturing all sections with one read
-    // At worst need two reads to get all necessary bytes
-    let assume_footer_len = file_len.min(DEFAULT_FOOTER_SIZE);
-    let mut tail_bytes = reader
-        .get_bytes(file_len - assume_footer_len, assume_footer_len)
-        .context(error::IoSnafu)?;
-
-    // The final byte of the file contains the serialized length of the Postscript,
-    // which must be less than 256 bytes.
-    let postscript_len = tail_bytes[tail_bytes.len() - 1] as u64;
-    tail_bytes.truncate(tail_bytes.len() - 1);
-
-    if tail_bytes.len() < postscript_len as usize {
-        return OutOfSpecSnafu {
-            msg: "File too small for given postscript length",
-        }
-        .fail();
-    }
-    let postscript = PostScript::decode(&tail_bytes[tail_bytes.len() - postscript_len as usize..])
-        .context(error::DecodeProtoSnafu)?;
-    let compression =
-        Compression::from_proto(postscript.compression(), postscript.compression_block_size);
-    tail_bytes.truncate(tail_bytes.len() - postscript_len as usize);
-
-    let footer_length = postscript.footer_length.context(error::OutOfSpecSnafu {
-        msg: "Footer length is empty",
-    })?;
-    let metadata_length = postscript.metadata_length.context(error::OutOfSpecSnafu {
-        msg: "Metadata length is empty",
-    })?;
-
-    // Ensure we have enough bytes for Footer and Metadata
-    let mut tail_bytes = if footer_length + metadata_length > tail_bytes.len() as u64 {
-        // Need second read
-        // -1 is the postscript length byte
-        let offset = file_len - 1 - postscript_len - footer_length - metadata_length;
-        let bytes_to_read = (footer_length + metadata_length) - tail_bytes.len() as u64;
-        let prepend_bytes = reader
-            .get_bytes(offset, bytes_to_read)
-            .context(error::IoSnafu)?;
-        let mut all_bytes = BytesMut::with_capacity(prepend_bytes.len() + tail_bytes.len());
-        all_bytes.extend_from_slice(&prepend_bytes);
-        all_bytes.extend_from_slice(&tail_bytes);
-        all_bytes.into()
-    } else {
-        tail_bytes
-    };
-
-    let footer = deserialize_footer(
-        tail_bytes.slice(tail_bytes.len() - footer_length as usize..),
-        compression,
-    )?;
-    tail_bytes.truncate(tail_bytes.len() - footer_length as usize);
-
-    let metadata = deserialize_footer_metadata(
-        tail_bytes.slice(tail_bytes.len() - metadata_length as usize..),
-        compression,
-    )?;
-
-    FileMetadata::from_proto(&postscript, &footer, &metadata)
-}
-
-#[cfg(feature = "async")]
-pub async fn read_metadata_async<R: super::AsyncChunkReader>(
-    reader: &mut R,
-) -> Result<FileMetadata> {
-    let file_len = reader.len().await.context(error::IoSnafu)?;
-    if file_len == 0 {
-        return EmptyFileSnafu.fail();
-    }
-
-    // Initial read of the file tail
-    // Use a default size for first read in hopes of capturing all sections with one read
-    // At worst need two reads to get all necessary bytes
-    let assume_footer_len = file_len.min(DEFAULT_FOOTER_SIZE);
-    let mut tail_bytes = reader
-        .get_bytes(file_len - assume_footer_len, assume_footer_len)
-        .await
-        .context(error::IoSnafu)?;
-
-    // The final byte of the file contains the serialized length of the Postscript,
-    // which must be less than 256 bytes.
-    let postscript_len = tail_bytes[tail_bytes.len() - 1] as u64;
-    tail_bytes.truncate(tail_bytes.len() - 1);
-
-    if tail_bytes.len() < postscript_len as usize {
-        return OutOfSpecSnafu {
-            msg: "File too small for given postscript length",
-        }
-        .fail();
-    }
-    let postscript = PostScript::decode(&tail_bytes[tail_bytes.len() - postscript_len as usize..])
-        .context(error::DecodeProtoSnafu)?;
-    let compression =
-        Compression::from_proto(postscript.compression(), postscript.compression_block_size);
-    tail_bytes.truncate(tail_bytes.len() - postscript_len as usize);
-
-    let footer_length = postscript.footer_length.context(error::OutOfSpecSnafu {
-        msg: "Footer length is empty",
-    })?;
-    let metadata_length = postscript.metadata_length.context(error::OutOfSpecSnafu {
-        msg: "Metadata length is empty",
-    })?;
-
-    // Ensure we have enough bytes for Footer and Metadata
-    let mut tail_bytes = if footer_length + metadata_length > tail_bytes.len() as u64 {
-        // Need second read
-        // -1 is the postscript length byte
-        let offset = file_len - 1 - postscript_len - footer_length - metadata_length;
-        let bytes_to_read = (footer_length + metadata_length) - tail_bytes.len() as u64;
-        let prepend_bytes = reader
-            .get_bytes(offset, bytes_to_read)
-            .await
-            .context(error::IoSnafu)?;
-        let mut all_bytes = BytesMut::with_capacity(prepend_bytes.len() + tail_bytes.len());
-        all_bytes.extend_from_slice(&prepend_bytes);
-        all_bytes.extend_from_slice(&tail_bytes);
-        all_bytes.into()
-    } else {
-        tail_bytes
-    };
-
-    let footer = deserialize_footer(
-        tail_bytes.slice(tail_bytes.len() - footer_length as usize..),
-        compression,
-    )?;
-    tail_bytes.truncate(tail_bytes.len() - footer_length as usize);
-
-    let metadata = deserialize_footer_metadata(
-        tail_bytes.slice(tail_bytes.len() - metadata_length as usize..),
-        compression,
-    )?;
-
-    FileMetadata::from_proto(&postscript, &footer, &metadata)
-}
-
-fn deserialize_footer(bytes: Bytes, compression: Option<Compression>) -> Result<Footer> {
-    let mut buffer = vec![];
-    Decompressor::new(bytes, compression, vec![])
-        .read_to_end(&mut buffer)
-        .context(error::IoSnafu)?;
-    Footer::decode(buffer.as_slice()).context(error::DecodeProtoSnafu)
-}
-
-fn deserialize_footer_metadata(bytes: Bytes, compression: Option<Compression>) -> Result<Metadata> {
-    let mut buffer = vec![];
-    Decompressor::new(bytes, compression, vec![])
-        .read_to_end(&mut buffer)
-        .context(error::IoSnafu)?;
-    Metadata::decode(buffer.as_slice()).context(error::DecodeProtoSnafu)
-}
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
deleted file mode 100644
index fe234c97..00000000
--- a/src/reader/mod.rs
+++ /dev/null
@@ -1,207 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub mod metadata;
-
-use std::fs::File;
-use std::io::{BufReader, Read, Seek, SeekFrom};
-
-use bytes::{Buf, Bytes};
-
-/// Primary source used for reading required bytes for operations.
-#[allow(clippy::len_without_is_empty)]
-pub trait ChunkReader {
-    type T: Read;
-
-    /// Get total length of bytes. Useful for parsing the metadata located at
-    /// the end of the file.
-    // TODO: this is only used for file tail, so replace with load_metadata?
-    fn len(&self) -> u64;
-
-    /// Get a reader starting at a specific offset.
-    fn get_read(&self, offset_from_start: u64) -> std::io::Result<Self::T>;
-
-    /// Read bytes from an offset with specific length.
-    fn get_bytes(&self, offset_from_start: u64, length: u64) -> std::io::Result<Bytes> {
-        let mut bytes = vec![0; length as usize];
-        self.get_read(offset_from_start)?
-            .take(length)
-            .read_exact(&mut bytes)?;
-        Ok(bytes.into())
-    }
-}
-
-impl ChunkReader for File {
-    type T = BufReader<File>;
-
-    fn len(&self) -> u64 {
-        self.metadata().map(|m| m.len()).unwrap_or(0u64)
-    }
-
-    /// Care needs to be taken when using this simultaneously as underlying
-    /// file descriptor is the same and will be affected by other invocations.
-    ///
-    /// See [`File::try_clone()`] for more details.
-    fn get_read(&self, offset_from_start: u64) -> std::io::Result<Self::T> {
-        let mut reader = self.try_clone()?;
-        reader.seek(SeekFrom::Start(offset_from_start))?;
-        Ok(BufReader::new(self.try_clone()?))
-    }
-}
-
-impl ChunkReader for Bytes {
-    type T = bytes::buf::Reader<Bytes>;
-
-    fn len(&self) -> u64 {
-        self.len() as u64
-    }
-
-    fn get_read(&self, offset_from_start: u64) -> std::io::Result<Self::T> {
-        Ok(self.slice(offset_from_start as usize..).reader())
-    }
-}
-
-#[cfg(feature = "async")]
-mod async_chunk_reader {
-    use super::*;
-
-    use futures_util::future::BoxFuture;
-    use futures_util::FutureExt;
-    use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
-
-    #[allow(clippy::len_without_is_empty)]
-    pub trait AsyncChunkReader: Send {
-        // TODO: this is only used for file tail, so replace with load_metadata?
-        fn len(&mut self) -> BoxFuture<'_, std::io::Result<u64>>;
-
-        fn get_bytes(
-            &mut self,
-            offset_from_start: u64,
-            length: u64,
-        ) -> BoxFuture<'_, std::io::Result<Bytes>>;
-    }
-
-    impl<T: AsyncRead + AsyncSeek + Unpin + Send> AsyncChunkReader for T {
-        fn len(&mut self) -> BoxFuture<'_, std::io::Result<u64>> {
-            async move { self.seek(SeekFrom::End(0)).await }.boxed()
-        }
-
-        fn get_bytes(
-            &mut self,
-            offset_from_start: u64,
-            length: u64,
-        ) -> BoxFuture<'_, std::io::Result<Bytes>> {
-            async move {
-                self.seek(SeekFrom::Start(offset_from_start)).await?;
-                let mut buffer = vec![0; length as usize];
-                self.read_exact(&mut buffer).await?;
-                Ok(buffer.into())
-            }
-            .boxed()
-        }
-    }
-
-    impl AsyncChunkReader for Box<dyn AsyncChunkReader> {
-        fn len(&mut self) -> BoxFuture<'_, std::io::Result<u64>> {
-            self.as_mut().len()
-        }
-
-        fn get_bytes(
-            &mut self,
-            offset_from_start: u64,
-            length: u64,
-        ) -> BoxFuture<'_, std::io::Result<Bytes>> {
-            self.as_mut().get_bytes(offset_from_start, length)
-        }
-    }
-}
-
-#[cfg(feature = "async")]
-pub use async_chunk_reader::AsyncChunkReader;
-
-#[cfg(all(feature = "async", feature = "opendal"))]
-mod async_opendal_reader {
-    use crate::reader::AsyncChunkReader;
-    use bytes::Bytes;
-    use futures_util::future::BoxFuture;
-    use opendal::Operator;
-    use std::sync::Arc;
-
-    /// AsyncOpendalReader provides native support for [`opendal`]
-    ///
-    /// ```
-    /// use opendal::Operator;
-    /// use std::io::Result;
-    /// use orc_rust::reader::AsyncOpendalReader;
-    /// use orc_rust::reader::AsyncChunkReader;
-    /// use opendal::services::MemoryConfig;
-    ///
-    /// # async fn test() -> Result<()> {
-    /// let op = Operator::from_config(MemoryConfig::default())?.finish();
-    /// op.write("test", "Hello, world!").await?;
-    ///
-    /// let mut reader = AsyncOpendalReader::new(op, "test");
-    /// let len = reader.len().await?;
-    /// let data = reader.get_bytes(0, len).await?;
-    /// #    Ok(())
-    /// # }
-    /// ```
-    pub struct AsyncOpendalReader {
-        op: Operator,
-        path: Arc<String>,
-    }
-
-    impl AsyncOpendalReader {
-        /// Create a new async opendal reader.
-        pub fn new(op: Operator, path: &str) -> Self {
-            Self {
-                op,
-                path: Arc::new(path.to_string()),
-            }
-        }
-    }
-
-    impl AsyncChunkReader for AsyncOpendalReader {
-        fn len(&mut self) -> BoxFuture<'_, std::io::Result<u64>> {
-            let path = self.path.clone();
-            Box::pin(async move {
-                let meta = self.op.stat(&path).await?;
-                Ok(meta.content_length())
-            })
-        }
-
-        fn get_bytes(
-            &mut self,
-            offset_from_start: u64,
-            length: u64,
-        ) -> BoxFuture<'_, std::io::Result<Bytes>> {
-            let path = self.path.clone();
-
-            Box::pin(async move {
-                let reader = self
-                    .op
-                    .read_with(&path)
-                    .range(offset_from_start..offset_from_start + length)
-                    .await?;
-                Ok(reader.to_bytes())
-            })
-        }
-    }
-}
-
-#[cfg(all(feature = "async", feature = "opendal"))]
-pub use async_opendal_reader::AsyncOpendalReader;
diff --git a/src/schema.rs b/src/schema.rs
deleted file mode 100644
index 8dcbb0e9..00000000
--- a/src/schema.rs
+++ /dev/null
@@ -1,549 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::collections::HashMap;
-use std::fmt::Display;
-use std::sync::Arc;
-
-use snafu::{ensure, OptionExt};
-
-use crate::error::{NoTypesSnafu, Result, UnexpectedSnafu};
-use crate::projection::ProjectionMask;
-use crate::proto;
-
-use arrow::datatypes::{DataType as ArrowDataType, Field, Schema, TimeUnit, UnionMode};
-
-/// Represents the root data type of the ORC file. Contains multiple named child types
-/// which map to the columns available. Allows projecting only specific columns from
-/// the base schema.
-///
-/// This is essentially a Struct type, but with special handling such as for projection
-/// and transforming into an Arrow schema.
-///
-/// Note that the ORC spec states the root type does not necessarily have to be a Struct.
-/// Currently we only support having a Struct as the root data type.
-///
-/// See: <https://orc.apache.org/docs/types.html>
-#[derive(Debug, Clone)]
-pub struct RootDataType {
-    children: Vec<NamedColumn>,
-}
-
-impl RootDataType {
-    /// Root column index is always 0.
-    pub fn column_index(&self) -> usize {
-        0
-    }
-
-    /// Base columns of the file.
-    pub fn children(&self) -> &[NamedColumn] {
-        &self.children
-    }
-
-    /// Convert into an Arrow schema.
-    pub fn create_arrow_schema(&self, user_metadata: &HashMap<String, String>) -> Schema {
-        let fields = self
-            .children
-            .iter()
-            .map(|col| {
-                let dt = col.data_type().to_arrow_data_type();
-                Field::new(col.name(), dt, true)
-            })
-            .collect::<Vec<_>>();
-        Schema::new_with_metadata(fields, user_metadata.clone())
-    }
-
-    /// Create new root data type based on mask of columns to project.
-    pub fn project(&self, mask: &ProjectionMask) -> Self {
-        // TODO: fix logic here to account for nested projection
-        let children = self
-            .children
-            .iter()
-            .filter(|col| mask.is_index_projected(col.data_type().column_index()))
-            .map(|col| col.to_owned())
-            .collect::<Vec<_>>();
-        Self { children }
-    }
-
-    /// Construct from protobuf types.
-    pub(crate) fn from_proto(types: &[proto::Type]) -> Result<Self> {
-        ensure!(!types.is_empty(), NoTypesSnafu {});
-        let children = parse_struct_children_from_proto(types, 0)?;
-        Ok(Self { children })
-    }
-}
-
-impl Display for RootDataType {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "ROOT")?;
-        for child in &self.children {
-            write!(f, "\n  {child}")?;
-        }
-        Ok(())
-    }
-}
-
-#[derive(Debug, Clone)]
-pub struct NamedColumn {
-    name: String,
-    data_type: DataType,
-}
-
-impl NamedColumn {
-    pub fn name(&self) -> &str {
-        self.name.as_str()
-    }
-
-    pub fn data_type(&self) -> &DataType {
-        &self.data_type
-    }
-}
-
-impl Display for NamedColumn {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{} {}", self.name(), self.data_type())
-    }
-}
-
-/// Helper function since this is duplicated for [`RootDataType`] and [`DataType::Struct`]
-/// parsing from proto.
-fn parse_struct_children_from_proto(
-    types: &[proto::Type],
-    column_index: usize,
-) -> Result<Vec<NamedColumn>> {
-    // These pre-conditions should always be upheld, especially as this is a private function
-    assert!(column_index < types.len());
-    let ty = &types[column_index];
-    assert!(ty.kind() == proto::r#type::Kind::Struct);
-    ensure!(
-        ty.subtypes.len() == ty.field_names.len(),
-        UnexpectedSnafu {
-            msg: format!(
-                "Struct type for column index {} must have matching lengths for subtypes and field names lists",
-                column_index,
-            )
-        }
-    );
-    let children = ty
-        .subtypes
-        .iter()
-        .zip(ty.field_names.iter())
-        .map(|(&index, name)| {
-            let index = index as usize;
-            let name = name.to_owned();
-            let data_type = DataType::from_proto(types, index)?;
-            Ok(NamedColumn { name, data_type })
-        })
-        .collect::<Result<Vec<_>>>()?;
-    Ok(children)
-}
-
-/// Represents the exact data types supported by ORC.
-///
-/// Each variant holds the column index in order to associate the type
-/// with the specific column data present in the stripes.
-#[derive(Debug, Clone)]
-pub enum DataType {
-    /// 1 bit packed data.
-    Boolean { column_index: usize },
-    /// 8 bit integer, also called TinyInt.
-    Byte { column_index: usize },
-    /// 16 bit integer, also called SmallInt.
-    Short { column_index: usize },
-    /// 32 bit integer.
-    Int { column_index: usize },
-    /// 64 bit integer, also called BigInt.
-    Long { column_index: usize },
-    /// 32 bit floating-point number.
-    Float { column_index: usize },
-    /// 64 bit floating-point number.
-    Double { column_index: usize },
-    /// UTF-8 encoded strings.
-    String { column_index: usize },
-    /// UTF-8 encoded strings, with an upper length limit on values.
-    Varchar {
-        column_index: usize,
-        max_length: u32,
-    },
-    /// UTF-8 encoded strings, with an upper length limit on values.
-    Char {
-        column_index: usize,
-        max_length: u32,
-    },
-    /// Arbitrary byte array values.
-    Binary { column_index: usize },
-    /// Decimal numbers with a fixed precision and scale.
-    Decimal {
-        column_index: usize,
-        // TODO: narrow to u8
-        precision: u32,
-        scale: u32,
-    },
-    /// Represents specific date and time, down to the nanosecond, as offset
-    /// since 1st January 2015, with no timezone.
-    ///
-    /// The date and time represented by values of this column does not change
-    /// based on the reader's timezone.
-    Timestamp { column_index: usize },
-    /// Represents specific date and time, down to the nanosecond, as offset
-    /// since 1st January 2015, with timezone.
-    ///
-    /// The date and time represented by values of this column changes based
-    /// on the reader's timezone (is a fixed instant in time).
-    TimestampWithLocalTimezone { column_index: usize },
-    /// Represents specific date (without time) as days since the UNIX epoch
-    /// (1st January 1970 UTC).
-    Date { column_index: usize },
-    /// Compound type with named child subtypes, representing a structured
-    /// collection of children types.
-    Struct {
-        column_index: usize,
-        children: Vec<NamedColumn>,
-    },
-    /// Compound type where each value in the column is a list of values
-    /// of another type, specified by the child type.
-    List {
-        column_index: usize,
-        child: Box<DataType>,
-    },
-    /// Compound type with two children subtypes, key and value, representing
-    /// key-value pairs for column values.
-    Map {
-        column_index: usize,
-        key: Box<DataType>,
-        value: Box<DataType>,
-    },
-    /// Compound type which can represent multiple types of data within
-    /// the same column.
-    ///
-    /// It's variants represent which types it can be (where each value in
-    /// the column can only be one of these types).
-    Union {
-        column_index: usize,
-        variants: Vec<DataType>,
-    },
-}
-
-impl DataType {
-    /// Retrieve the column index of this data type, used for getting the specific column
-    /// streams/statistics in the file.
-    pub fn column_index(&self) -> usize {
-        match self {
-            DataType::Boolean { column_index } => *column_index,
-            DataType::Byte { column_index } => *column_index,
-            DataType::Short { column_index } => *column_index,
-            DataType::Int { column_index } => *column_index,
-            DataType::Long { column_index } => *column_index,
-            DataType::Float { column_index } => *column_index,
-            DataType::Double { column_index } => *column_index,
-            DataType::String { column_index } => *column_index,
-            DataType::Varchar { column_index, .. } => *column_index,
-            DataType::Char { column_index, .. } => *column_index,
-            DataType::Binary { column_index } => *column_index,
-            DataType::Decimal { column_index, .. } => *column_index,
-            DataType::Timestamp { column_index } => *column_index,
-            DataType::TimestampWithLocalTimezone { column_index } => *column_index,
-            DataType::Date { column_index } => *column_index,
-            DataType::Struct { column_index, .. } => *column_index,
-            DataType::List { column_index, .. } => *column_index,
-            DataType::Map { column_index, .. } => *column_index,
-            DataType::Union { column_index, .. } => *column_index,
-        }
-    }
-
-    /// All children column indices.
-    pub fn children_indices(&self) -> Vec<usize> {
-        match self {
-            DataType::Boolean { .. }
-            | DataType::Byte { .. }
-            | DataType::Short { .. }
-            | DataType::Int { .. }
-            | DataType::Long { .. }
-            | DataType::Float { .. }
-            | DataType::Double { .. }
-            | DataType::String { .. }
-            | DataType::Varchar { .. }
-            | DataType::Char { .. }
-            | DataType::Binary { .. }
-            | DataType::Decimal { .. }
-            | DataType::Timestamp { .. }
-            | DataType::TimestampWithLocalTimezone { .. }
-            | DataType::Date { .. } => vec![],
-            DataType::Struct { children, .. } => children
-                .iter()
-                .flat_map(|col| col.data_type().children_indices())
-                .collect(),
-            DataType::List { child, .. } => child.all_indices(),
-            DataType::Map { key, value, .. } => {
-                let mut indices = key.children_indices();
-                indices.extend(value.children_indices());
-                indices
-            }
-            DataType::Union { variants, .. } => variants
-                .iter()
-                .flat_map(|dt| dt.children_indices())
-                .collect(),
-        }
-    }
-
-    /// Includes self index and all children column indices.
-    pub fn all_indices(&self) -> Vec<usize> {
-        let mut indices = vec![self.column_index()];
-        indices.extend(self.children_indices());
-        indices
-    }
-
-    fn from_proto(types: &[proto::Type], column_index: usize) -> Result<Self> {
-        use proto::r#type::Kind;
-
-        let ty = types.get(column_index).context(UnexpectedSnafu {
-            msg: format!("Column index out of bounds: {column_index}"),
-        })?;
-        let dt = match ty.kind() {
-            Kind::Boolean => Self::Boolean { column_index },
-            Kind::Byte => Self::Byte { column_index },
-            Kind::Short => Self::Short { column_index },
-            Kind::Int => Self::Int { column_index },
-            Kind::Long => Self::Long { column_index },
-            Kind::Float => Self::Float { column_index },
-            Kind::Double => Self::Double { column_index },
-            Kind::String => Self::String { column_index },
-            Kind::Binary => Self::Binary { column_index },
-            Kind::Timestamp => Self::Timestamp { column_index },
-            Kind::List => {
-                ensure!(
-                    ty.subtypes.len() == 1,
-                    UnexpectedSnafu {
-                        msg: format!(
-                            "List type for column index {} must have 1 sub type, found {}",
-                            column_index,
-                            ty.subtypes.len()
-                        )
-                    }
-                );
-                let child = ty.subtypes[0] as usize;
-                let child = Box::new(Self::from_proto(types, child)?);
-                Self::List {
-                    column_index,
-                    child,
-                }
-            }
-            Kind::Map => {
-                ensure!(
-                    ty.subtypes.len() == 2,
-                    UnexpectedSnafu {
-                        msg: format!(
-                            "Map type for column index {} must have 2 sub types, found {}",
-                            column_index,
-                            ty.subtypes.len()
-                        )
-                    }
-                );
-                let key = ty.subtypes[0] as usize;
-                let key = Box::new(Self::from_proto(types, key)?);
-                let value = ty.subtypes[1] as usize;
-                let value = Box::new(Self::from_proto(types, value)?);
-                Self::Map {
-                    column_index,
-                    key,
-                    value,
-                }
-            }
-            Kind::Struct => {
-                let children = parse_struct_children_from_proto(types, column_index)?;
-                Self::Struct {
-                    column_index,
-                    children,
-                }
-            }
-            Kind::Union => {
-                // TODO: bump this limit up to 256
-                ensure!(
-                    ty.subtypes.len() <= 127,
-                    UnexpectedSnafu {
-                        msg: format!(
-                            "Union type for column index {} cannot exceed 127 variants, found {}",
-                            column_index,
-                            ty.subtypes.len()
-                        )
-                    }
-                );
-                let variants = ty
-                    .subtypes
-                    .iter()
-                    .map(|&index| {
-                        let index = index as usize;
-                        Self::from_proto(types, index)
-                    })
-                    .collect::<Result<Vec<_>>>()?;
-                Self::Union {
-                    column_index,
-                    variants,
-                }
-            }
-            Kind::Decimal => Self::Decimal {
-                column_index,
-                precision: ty.precision(),
-                scale: ty.scale(),
-            },
-            Kind::Date => Self::Date { column_index },
-            Kind::Varchar => Self::Varchar {
-                column_index,
-                max_length: ty.maximum_length(),
-            },
-            Kind::Char => Self::Char {
-                column_index,
-                max_length: ty.maximum_length(),
-            },
-            Kind::TimestampInstant => Self::TimestampWithLocalTimezone { column_index },
-        };
-        Ok(dt)
-    }
-
-    pub fn to_arrow_data_type(&self) -> ArrowDataType {
-        match self {
-            DataType::Boolean { .. } => ArrowDataType::Boolean,
-            DataType::Byte { .. } => ArrowDataType::Int8,
-            DataType::Short { .. } => ArrowDataType::Int16,
-            DataType::Int { .. } => ArrowDataType::Int32,
-            DataType::Long { .. } => ArrowDataType::Int64,
-            DataType::Float { .. } => ArrowDataType::Float32,
-            DataType::Double { .. } => ArrowDataType::Float64,
-            DataType::String { .. } | DataType::Varchar { .. } | DataType::Char { .. } => {
-                ArrowDataType::Utf8
-            }
-            DataType::Binary { .. } => ArrowDataType::Binary,
-            DataType::Decimal {
-                precision, scale, ..
-            } => ArrowDataType::Decimal128(*precision as u8, *scale as i8), // TODO: safety of cast?
-            DataType::Timestamp { .. } => ArrowDataType::Timestamp(TimeUnit::Nanosecond, None),
-            DataType::TimestampWithLocalTimezone { .. } => {
-                ArrowDataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
-            }
-            DataType::Date { .. } => ArrowDataType::Date32,
-            DataType::Struct { children, .. } => {
-                let children = children
-                    .iter()
-                    .map(|col| {
-                        let dt = col.data_type().to_arrow_data_type();
-                        Field::new(col.name(), dt, true)
-                    })
-                    .collect();
-                ArrowDataType::Struct(children)
-            }
-            DataType::List { child, .. } => {
-                let child = child.to_arrow_data_type();
-                ArrowDataType::new_list(child, true)
-            }
-            DataType::Map { key, value, .. } => {
-                // TODO: this needs to be kept in sync with MapArrayDecoder
-                //       move to common location?
-                // TODO: should it be "keys" and "values" (like arrow-rs)
-                //       or "key" and "value" like PyArrow and in Schema.fbs?
-                let key = key.to_arrow_data_type();
-                let key = Field::new("keys", key, false);
-                let value = value.to_arrow_data_type();
-                let value = Field::new("values", value, true);
-
-                let dt = ArrowDataType::Struct(vec![key, value].into());
-                let dt = Arc::new(Field::new("entries", dt, false));
-                ArrowDataType::Map(dt, false)
-            }
-            DataType::Union { variants, .. } => {
-                let fields = variants
-                    .iter()
-                    .enumerate()
-                    .map(|(index, variant)| {
-                        // Limited to 127 variants max (in from_proto)
-                        // TODO: Support up to including 256
-                        //       Need to do Union within Union
-                        let index = index as u8 as i8;
-                        let arrow_dt = variant.to_arrow_data_type();
-                        // Name shouldn't matter here (only ORC struct types give names to subtypes anyway)
-                        // Using naming convention following PyArrow for easier comparison
-                        let field = Arc::new(Field::new(format!("_union_{index}"), arrow_dt, true));
-                        (index, field)
-                    })
-                    .collect();
-                ArrowDataType::Union(fields, UnionMode::Sparse)
-            }
-        }
-    }
-}
-
-impl Display for DataType {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            DataType::Boolean { column_index: _ } => write!(f, "BOOLEAN"),
-            DataType::Byte { column_index: _ } => write!(f, "BYTE"),
-            DataType::Short { column_index: _ } => write!(f, "SHORT"),
-            DataType::Int { column_index: _ } => write!(f, "INTEGER"),
-            DataType::Long { column_index: _ } => write!(f, "LONG"),
-            DataType::Float { column_index: _ } => write!(f, "FLOAT"),
-            DataType::Double { column_index: _ } => write!(f, "DOUBLE"),
-            DataType::String { column_index: _ } => write!(f, "STRING"),
-            DataType::Varchar {
-                column_index: _,
-                max_length,
-            } => write!(f, "VARCHAR({max_length})"),
-            DataType::Char {
-                column_index: _,
-                max_length,
-            } => write!(f, "CHAR({max_length})"),
-            DataType::Binary { column_index: _ } => write!(f, "BINARY"),
-            DataType::Decimal {
-                column_index: _,
-                precision,
-                scale,
-            } => write!(f, "DECIMAL({precision}, {scale})"),
-            DataType::Timestamp { column_index: _ } => write!(f, "TIMESTAMP"),
-            DataType::TimestampWithLocalTimezone { column_index: _ } => {
-                write!(f, "TIMESTAMP INSTANT")
-            }
-            DataType::Date { column_index: _ } => write!(f, "DATE"),
-            DataType::Struct {
-                column_index: _,
-                children,
-            } => {
-                write!(f, "STRUCT")?;
-                for child in children {
-                    write!(f, "\n  {child}")?;
-                }
-                Ok(())
-            }
-            DataType::List {
-                column_index: _,
-                child,
-            } => write!(f, "LIST\n  {child}"),
-            DataType::Map {
-                column_index: _,
-                key,
-                value,
-            } => write!(f, "MAP\n  {key}\n  {value}"),
-            DataType::Union {
-                column_index: _,
-                variants,
-            } => {
-                write!(f, "UNION")?;
-                for variant in variants {
-                    write!(f, "\n  {variant}")?;
-                }
-                Ok(())
-            }
-        }
-    }
-}
diff --git a/src/statistics.rs b/src/statistics.rs
deleted file mode 100644
index 6891e91d..00000000
--- a/src/statistics.rs
+++ /dev/null
@@ -1,163 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::{error, proto};
-
-/// Contains statistics for a specific column, for the entire file
-/// or for a specific stripe.
-#[derive(Debug, Clone)]
-pub struct ColumnStatistics {
-    number_of_values: u64,
-    /// Use aid in 'IS NULL' predicates
-    has_null: bool,
-    type_statistics: Option<TypeStatistics>,
-}
-
-impl ColumnStatistics {
-    pub fn number_of_values(&self) -> u64 {
-        self.number_of_values
-    }
-
-    pub fn has_null(&self) -> bool {
-        self.has_null
-    }
-
-    pub fn type_statistics(&self) -> Option<&TypeStatistics> {
-        self.type_statistics.as_ref()
-    }
-}
-
-#[derive(Debug, Clone)]
-pub enum TypeStatistics {
-    /// For TinyInt, SmallInt, Int and BigInt
-    Integer {
-        min: i64,
-        max: i64,
-        /// If sum overflows then recorded as None
-        sum: Option<i64>,
-    },
-    /// For Float and Double
-    Double {
-        min: f64,
-        max: f64,
-        /// If sum overflows then recorded as None
-        sum: Option<f64>,
-    },
-    String {
-        min: String,
-        max: String,
-        /// Total length of all strings
-        sum: i64,
-    },
-    /// For Boolean
-    Bucket { true_count: u64 },
-    Decimal {
-        // TODO: use our own decimal type?
-        min: String,
-        max: String,
-        sum: String,
-    },
-    Date {
-        /// Days since epoch
-        min: i32,
-        max: i32,
-    },
-    Binary {
-        // Total number of bytes across all values
-        sum: i64,
-    },
-    Timestamp {
-        /// Milliseconds since epoch
-        /// These were used before ORC-135
-        /// Where local timezone offset was included
-        min: i64,
-        max: i64,
-        /// Milliseconds since UNIX epoch
-        min_utc: i64,
-        max_utc: i64,
-    },
-    Collection {
-        min_children: u64,
-        max_children: u64,
-        total_children: u64,
-    },
-}
-
-impl TryFrom<&proto::ColumnStatistics> for ColumnStatistics {
-    type Error = error::OrcError;
-
-    fn try_from(value: &proto::ColumnStatistics) -> Result<Self, Self::Error> {
-        let type_statistics = if let Some(stats) = &value.int_statistics {
-            Some(TypeStatistics::Integer {
-                min: stats.minimum(),
-                max: stats.maximum(),
-                sum: stats.sum,
-            })
-        } else if let Some(stats) = &value.double_statistics {
-            Some(TypeStatistics::Double {
-                min: stats.minimum(),
-                max: stats.maximum(),
-                sum: stats.sum,
-            })
-        } else if let Some(stats) = &value.string_statistics {
-            Some(TypeStatistics::String {
-                min: stats.minimum().to_owned(),
-                max: stats.maximum().to_owned(),
-                sum: stats.sum(),
-            })
-        } else if let Some(stats) = &value.bucket_statistics {
-            // TODO: false count?
-            Some(TypeStatistics::Bucket {
-                true_count: stats.count[0], // TODO: safety check this
-            })
-        } else if let Some(stats) = &value.decimal_statistics {
-            Some(TypeStatistics::Decimal {
-                min: stats.minimum().to_owned(),
-                max: stats.maximum().to_owned(),
-                sum: stats.sum().to_owned(),
-            })
-        } else if let Some(stats) = &value.date_statistics {
-            Some(TypeStatistics::Date {
-                min: stats.minimum(),
-                max: stats.maximum(),
-            })
-        } else if let Some(stats) = &value.binary_statistics {
-            Some(TypeStatistics::Binary { sum: stats.sum() })
-        } else if let Some(stats) = &value.timestamp_statistics {
-            Some(TypeStatistics::Timestamp {
-                min: stats.minimum(),
-                max: stats.maximum(),
-                min_utc: stats.minimum_utc(),
-                max_utc: stats.maximum_utc(),
-            })
-        } else {
-            value
-                .collection_statistics
-                .as_ref()
-                .map(|stats| TypeStatistics::Collection {
-                    min_children: stats.min_children(),
-                    max_children: stats.max_children(),
-                    total_children: stats.total_children(),
-                })
-        };
-        Ok(Self {
-            number_of_values: value.number_of_values(),
-            has_null: value.has_null(),
-            type_statistics,
-        })
-    }
-}
diff --git a/src/stripe.rs b/src/stripe.rs
deleted file mode 100644
index 3f043610..00000000
--- a/src/stripe.rs
+++ /dev/null
@@ -1,295 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::collections::HashSet;
-use std::{collections::HashMap, io::Read, sync::Arc};
-
-use bytes::Bytes;
-use prost::Message;
-use snafu::ResultExt;
-
-use crate::{
-    column::Column,
-    compression::{Compression, Decompressor},
-    error::{self, IoSnafu, Result},
-    proto::{self, stream::Kind, StripeFooter},
-    reader::{metadata::FileMetadata, ChunkReader},
-    schema::RootDataType,
-    statistics::ColumnStatistics,
-};
-
-/// Stripe metadata parsed from the file tail metadata sections.
-/// Does not contain the actual stripe bytes, as those are decoded
-/// when they are required.
-#[derive(Debug, Clone)]
-pub struct StripeMetadata {
-    /// Statistics of columns across this specific stripe
-    column_statistics: Vec<ColumnStatistics>,
-    /// Byte offset of start of stripe from start of file
-    offset: u64,
-    /// Byte length of index section
-    index_length: u64,
-    /// Byte length of data section
-    data_length: u64,
-    /// Byte length of footer section
-    footer_length: u64,
-    /// Number of rows in the stripe
-    number_of_rows: u64,
-}
-
-impl StripeMetadata {
-    pub fn offset(&self) -> u64 {
-        self.offset
-    }
-
-    pub fn index_length(&self) -> u64 {
-        self.index_length
-    }
-
-    pub fn data_length(&self) -> u64 {
-        self.data_length
-    }
-
-    pub fn footer_length(&self) -> u64 {
-        self.footer_length
-    }
-
-    pub fn number_of_rows(&self) -> u64 {
-        self.number_of_rows
-    }
-
-    pub fn column_statistics(&self) -> &[ColumnStatistics] {
-        &self.column_statistics
-    }
-
-    pub fn footer_offset(&self) -> u64 {
-        self.offset + self.index_length + self.data_length
-    }
-}
-
-impl TryFrom<(&proto::StripeInformation, &proto::StripeStatistics)> for StripeMetadata {
-    type Error = error::OrcError;
-
-    fn try_from(value: (&proto::StripeInformation, &proto::StripeStatistics)) -> Result<Self> {
-        let column_statistics = value
-            .1
-            .col_stats
-            .iter()
-            .map(TryFrom::try_from)
-            .collect::<Result<Vec<_>>>()?;
-        Ok(Self {
-            column_statistics,
-            offset: value.0.offset(),
-            index_length: value.0.index_length(),
-            data_length: value.0.data_length(),
-            footer_length: value.0.footer_length(),
-            number_of_rows: value.0.number_of_rows(),
-        })
-    }
-}
-
-impl TryFrom<&proto::StripeInformation> for StripeMetadata {
-    type Error = error::OrcError;
-
-    fn try_from(value: &proto::StripeInformation) -> Result<Self> {
-        Ok(Self {
-            column_statistics: vec![],
-            offset: value.offset(),
-            index_length: value.index_length(),
-            data_length: value.data_length(),
-            footer_length: value.footer_length(),
-            number_of_rows: value.number_of_rows(),
-        })
-    }
-}
-
-#[derive(Debug)]
-pub struct Stripe {
-    columns: Vec<Column>,
-    /// <(ColumnId, Kind), Bytes>
-    stream_map: Arc<StreamMap>,
-    number_of_rows: usize,
-    tz: Option<chrono_tz::Tz>,
-}
-
-impl Stripe {
-    pub fn new<R: ChunkReader>(
-        reader: &mut R,
-        file_metadata: &Arc<FileMetadata>,
-        projected_data_type: &RootDataType,
-        info: &StripeMetadata,
-    ) -> Result<Self> {
-        let compression = file_metadata.compression();
-
-        let footer = reader
-            .get_bytes(info.footer_offset(), info.footer_length())
-            .context(IoSnafu)?;
-        let footer = Arc::new(deserialize_stripe_footer(footer, compression)?);
-
-        let columns: Vec<Column> = projected_data_type
-            .children()
-            .iter()
-            .map(|col| Column::new(col.name(), col.data_type(), &footer))
-            .collect();
-        let column_ids = collect_required_column_ids(&columns);
-
-        let mut stream_map = HashMap::new();
-        let mut stream_offset = info.offset();
-        for stream in &footer.streams {
-            let length = stream.length();
-            let column_id = stream.column();
-            if column_ids.contains(&column_id) {
-                let kind = stream.kind();
-                let data = Column::read_stream(reader, stream_offset, length)?;
-                stream_map.insert((column_id, kind), data);
-            }
-            stream_offset += length;
-        }
-
-        let tz: Option<chrono_tz::Tz> = footer
-            .writer_timezone
-            .as_ref()
-            // TODO: make this return error
-            .map(|a| a.parse::<chrono_tz::Tz>().unwrap());
-
-        Ok(Self {
-            columns,
-            stream_map: Arc::new(StreamMap {
-                inner: stream_map,
-                compression,
-            }),
-            number_of_rows: info.number_of_rows() as usize,
-            tz,
-        })
-    }
-
-    // TODO: reduce duplication with above
-    #[cfg(feature = "async")]
-    pub async fn new_async<R: crate::reader::AsyncChunkReader>(
-        reader: &mut R,
-        file_metadata: &Arc<FileMetadata>,
-        projected_data_type: &RootDataType,
-        info: &StripeMetadata,
-    ) -> Result<Self> {
-        let compression = file_metadata.compression();
-
-        let footer = reader
-            .get_bytes(info.footer_offset(), info.footer_length())
-            .await
-            .context(IoSnafu)?;
-        let footer = Arc::new(deserialize_stripe_footer(footer, compression)?);
-
-        let columns: Vec<Column> = projected_data_type
-            .children()
-            .iter()
-            .map(|col| Column::new(col.name(), col.data_type(), &footer))
-            .collect();
-        let column_ids = collect_required_column_ids(&columns);
-
-        let mut stream_map = HashMap::new();
-        let mut stream_offset = info.offset();
-        for stream in &footer.streams {
-            let length = stream.length();
-            let column_id = stream.column();
-            if column_ids.contains(&column_id) {
-                let kind = stream.kind();
-                let data = Column::read_stream_async(reader, stream_offset, length).await?;
-                stream_map.insert((column_id, kind), data);
-            }
-
-            stream_offset += length;
-        }
-
-        let tz: Option<chrono_tz::Tz> = footer
-            .writer_timezone
-            .as_ref()
-            // TODO: make this return error
-            .map(|a| a.parse::<chrono_tz::Tz>().unwrap());
-
-        Ok(Self {
-            columns,
-            stream_map: Arc::new(StreamMap {
-                inner: stream_map,
-                compression,
-            }),
-            number_of_rows: info.number_of_rows() as usize,
-            tz,
-        })
-    }
-
-    pub fn number_of_rows(&self) -> usize {
-        self.number_of_rows
-    }
-
-    pub(crate) fn stream_map(&self) -> &StreamMap {
-        &self.stream_map
-    }
-
-    pub fn columns(&self) -> &[Column] {
-        &self.columns
-    }
-
-    pub fn writer_tz(&self) -> Option<chrono_tz::Tz> {
-        self.tz
-    }
-}
-
-#[derive(Debug)]
-pub(crate) struct StreamMap {
-    pub inner: HashMap<(u32, Kind), Bytes>,
-    pub compression: Option<Compression>,
-}
-
-impl StreamMap {
-    pub fn get(&self, column: &Column, kind: Kind) -> Decompressor {
-        // There is edge case where if column has no data then the stream might be omitted entirely
-        // (e.g. if there is only 1 null element, then it'll have present stream, but no data stream)
-        // See the integration::meta_data test for an example of this
-        // TODO: some better way to handle this?
-        self.get_opt(column, kind)
-            .unwrap_or_else(Decompressor::empty)
-    }
-
-    pub fn get_opt(&self, column: &Column, kind: Kind) -> Option<Decompressor> {
-        let column_id = column.column_id();
-
-        self.inner
-            .get(&(column_id, kind))
-            .cloned()
-            .map(|data| Decompressor::new(data, self.compression, vec![]))
-    }
-}
-
-fn deserialize_stripe_footer(
-    bytes: Bytes,
-    compression: Option<Compression>,
-) -> Result<StripeFooter> {
-    let mut buffer = vec![];
-    Decompressor::new(bytes, compression, vec![])
-        .read_to_end(&mut buffer)
-        .context(error::IoSnafu)?;
-    StripeFooter::decode(buffer.as_slice()).context(error::DecodeProtoSnafu)
-}
-
-fn collect_required_column_ids(columns: &[Column]) -> HashSet<u32> {
-    let mut set = HashSet::new();
-    for column in columns {
-        set.insert(column.column_id());
-        set.extend(collect_required_column_ids(&column.children()));
-    }
-    set
-}
diff --git a/src/writer/column.rs b/src/writer/column.rs
deleted file mode 100644
index f1ec72f4..00000000
--- a/src/writer/column.rs
+++ /dev/null
@@ -1,402 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::marker::PhantomData;
-
-use arrow::{
-    array::{Array, ArrayRef, AsArray},
-    datatypes::{
-        ArrowPrimitiveType, ByteArrayType, Float32Type, Float64Type, GenericBinaryType,
-        GenericStringType, Int16Type, Int32Type, Int64Type, Int8Type,
-    },
-};
-use bytes::{BufMut, BytesMut};
-
-use crate::{
-    encoding::{
-        boolean::BooleanEncoder,
-        byte::ByteRleEncoder,
-        float::FloatEncoder,
-        integer::{rle_v2::RleV2Encoder, NInt, SignedEncoding, UnsignedEncoding},
-        PrimitiveValueEncoder,
-    },
-    error::Result,
-    memory::EstimateMemory,
-    writer::StreamType,
-};
-
-use super::{ColumnEncoding, Stream};
-
-/// Encodes a specific column for a stripe. Will encode to an internal memory
-/// buffer until it is finished, in which case it returns the stream bytes to
-/// be serialized to a writer.
-pub trait ColumnStripeEncoder: EstimateMemory {
-    /// Encode entire provided [`ArrayRef`] to internal buffer.
-    fn encode_array(&mut self, array: &ArrayRef) -> Result<()>;
-
-    /// Column encoding used for streams.
-    fn column_encoding(&self) -> ColumnEncoding;
-
-    /// Emit buffered streams to be written to the writer, and reset state
-    /// in preparation for next stripe.
-    fn finish(&mut self) -> Vec<Stream>;
-}
-
-// TODO: simplify these generics, probably overcomplicating things here
-
-/// Encoder for primitive ORC types (e.g. int, float). Uses a specific [`PrimitiveValueEncoder`] to
-/// encode the primitive values into internal memory. When finished, outputs a DATA stream and
-/// optionally a PRESENT stream.
-pub struct PrimitiveColumnEncoder<T: ArrowPrimitiveType, E: PrimitiveValueEncoder<T::Native>> {
-    encoder: E,
-    column_encoding: ColumnEncoding,
-    /// Lazily initialized once we encounter an [`Array`] with a [`NullBuffer`].
-    present: Option<BooleanEncoder>,
-    encoded_count: usize,
-    _phantom: PhantomData<T>,
-}
-
-impl<T: ArrowPrimitiveType, E: PrimitiveValueEncoder<T::Native>> PrimitiveColumnEncoder<T, E> {
-    // TODO: encode knowledge of the ColumnEncoding as part of the type, instead of requiring it
-    //       to be passed at runtime
-    pub fn new(column_encoding: ColumnEncoding) -> Self {
-        Self {
-            encoder: E::new(),
-            column_encoding,
-            present: None,
-            encoded_count: 0,
-            _phantom: Default::default(),
-        }
-    }
-}
-
-impl<T: ArrowPrimitiveType, E: PrimitiveValueEncoder<T::Native>> EstimateMemory
-    for PrimitiveColumnEncoder<T, E>
-{
-    fn estimate_memory_size(&self) -> usize {
-        self.encoder.estimate_memory_size()
-            + self
-                .present
-                .as_ref()
-                .map(|p| p.estimate_memory_size())
-                .unwrap_or(0)
-    }
-}
-
-impl<T: ArrowPrimitiveType, E: PrimitiveValueEncoder<T::Native>> ColumnStripeEncoder
-    for PrimitiveColumnEncoder<T, E>
-{
-    fn encode_array(&mut self, array: &ArrayRef) -> Result<()> {
-        // TODO: return as result instead of panicking here?
-        let array = array.as_primitive::<T>();
-        // Handling case where if encoding across RecordBatch boundaries, arrays
-        // might introduce a NullBuffer
-        match (array.nulls(), &mut self.present) {
-            // Need to copy only the valid values as indicated by null_buffer
-            (Some(null_buffer), Some(present)) => {
-                present.extend(null_buffer);
-                for index in null_buffer.valid_indices() {
-                    let v = array.value(index);
-                    self.encoder.write_one(v);
-                }
-            }
-            (Some(null_buffer), None) => {
-                // Lazily initiate present buffer and ensure backfill the already encoded values
-                let mut present = BooleanEncoder::new();
-                present.extend_present(self.encoded_count);
-                present.extend(null_buffer);
-                self.present = Some(present);
-                for index in null_buffer.valid_indices() {
-                    let v = array.value(index);
-                    self.encoder.write_one(v);
-                }
-            }
-            // Simple direct copy from values buffer, extending present if needed
-            (None, _) => {
-                let values = array.values();
-                self.encoder.write_slice(values);
-                if let Some(present) = self.present.as_mut() {
-                    present.extend_present(array.len())
-                }
-            }
-        }
-        self.encoded_count += array.len() - array.null_count();
-        Ok(())
-    }
-
-    fn column_encoding(&self) -> ColumnEncoding {
-        self.column_encoding
-    }
-
-    fn finish(&mut self) -> Vec<Stream> {
-        let bytes = self.encoder.take_inner();
-        // Return mandatory Data stream and optional Present stream
-        let data = Stream {
-            kind: StreamType::Data,
-            bytes,
-        };
-        self.encoded_count = 0;
-        match &mut self.present {
-            Some(present) => {
-                let bytes = present.finish();
-                let present = Stream {
-                    kind: StreamType::Present,
-                    bytes,
-                };
-                vec![data, present]
-            }
-            None => vec![data],
-        }
-    }
-}
-
-pub struct BooleanColumnEncoder {
-    encoder: BooleanEncoder,
-    /// Lazily initialized once we encounter an [`Array`] with a [`NullBuffer`].
-    present: Option<BooleanEncoder>,
-    encoded_count: usize,
-}
-
-impl BooleanColumnEncoder {
-    pub fn new() -> Self {
-        Self {
-            encoder: BooleanEncoder::new(),
-            present: None,
-            encoded_count: 0,
-        }
-    }
-}
-
-impl EstimateMemory for BooleanColumnEncoder {
-    fn estimate_memory_size(&self) -> usize {
-        self.encoder.estimate_memory_size()
-            + self
-                .present
-                .as_ref()
-                .map(|p| p.estimate_memory_size())
-                .unwrap_or(0)
-    }
-}
-
-impl ColumnStripeEncoder for BooleanColumnEncoder {
-    fn encode_array(&mut self, array: &ArrayRef) -> Result<()> {
-        // TODO: return as result instead of panicking here?
-        let array = array.as_boolean();
-        // Handling case where if encoding across RecordBatch boundaries, arrays
-        // might introduce a NullBuffer
-        match (array.nulls(), &mut self.present) {
-            // Need to copy only the valid values as indicated by null_buffer
-            (Some(null_buffer), Some(present)) => {
-                present.extend(null_buffer);
-                for index in null_buffer.valid_indices() {
-                    let v = array.value(index);
-                    self.encoder.extend_boolean(v);
-                }
-            }
-            (Some(null_buffer), None) => {
-                // Lazily initiate present buffer and ensure backfill the already encoded values
-                let mut present = BooleanEncoder::new();
-                present.extend_present(self.encoded_count);
-                present.extend(null_buffer);
-                self.present = Some(present);
-                for index in null_buffer.valid_indices() {
-                    let v = array.value(index);
-                    self.encoder.extend_boolean(v);
-                }
-            }
-            // Simple direct copy from values buffer, extending present if needed
-            (None, _) => {
-                let values = array.values();
-                self.encoder.extend_bb(values);
-                if let Some(present) = self.present.as_mut() {
-                    present.extend_present(array.len())
-                }
-            }
-        }
-        self.encoded_count += array.len() - array.null_count();
-        Ok(())
-    }
-
-    fn column_encoding(&self) -> ColumnEncoding {
-        ColumnEncoding::Direct
-    }
-
-    fn finish(&mut self) -> Vec<Stream> {
-        let bytes = self.encoder.finish();
-        // Return mandatory Data stream and optional Present stream
-        let data = Stream {
-            kind: StreamType::Data,
-            bytes,
-        };
-        self.encoded_count = 0;
-        match &mut self.present {
-            Some(present) => {
-                let bytes = present.finish();
-                let present = Stream {
-                    kind: StreamType::Present,
-                    bytes,
-                };
-                vec![data, present]
-            }
-            None => vec![data],
-        }
-    }
-}
-
-/// Direct encodes binary/strings.
-pub struct GenericBinaryColumnEncoder<T: ByteArrayType>
-where
-    T::Offset: NInt,
-{
-    string_bytes: BytesMut,
-    length_encoder: RleV2Encoder<T::Offset, UnsignedEncoding>,
-    present: Option<BooleanEncoder>,
-    encoded_count: usize,
-}
-
-impl<T: ByteArrayType> GenericBinaryColumnEncoder<T>
-where
-    T::Offset: NInt,
-{
-    pub fn new() -> Self {
-        Self {
-            string_bytes: BytesMut::new(),
-            length_encoder: RleV2Encoder::new(),
-            present: None,
-            encoded_count: 0,
-        }
-    }
-}
-
-impl<T: ByteArrayType> EstimateMemory for GenericBinaryColumnEncoder<T>
-where
-    T::Offset: NInt,
-{
-    fn estimate_memory_size(&self) -> usize {
-        self.string_bytes.len()
-            + self.length_encoder.estimate_memory_size()
-            + self
-                .present
-                .as_ref()
-                .map(|p| p.estimate_memory_size())
-                .unwrap_or(0)
-    }
-}
-
-impl<T: ByteArrayType> ColumnStripeEncoder for GenericBinaryColumnEncoder<T>
-where
-    T::Offset: NInt,
-{
-    fn encode_array(&mut self, array: &ArrayRef) -> Result<()> {
-        if array.is_empty() {
-            return Ok(());
-        }
-        // TODO: return as result instead of panicking here?
-        let array = array.as_bytes::<T>();
-        // Handling case where if encoding across RecordBatch boundaries, arrays
-        // might introduce a NullBuffer
-        match (array.nulls(), &mut self.present) {
-            // Need to copy only the valid values as indicated by null_buffer
-            (Some(null_buffer), Some(present)) => {
-                present.extend(null_buffer);
-                for index in null_buffer.valid_indices() {
-                    self.length_encoder.write_one(array.value_length(index));
-                    self.string_bytes.put_slice(array.value(index).as_ref());
-                }
-            }
-            (Some(null_buffer), None) => {
-                // Lazily initiate present buffer and ensure backfill the already encoded values
-                let mut present = BooleanEncoder::new();
-                present.extend_present(self.encoded_count);
-                present.extend(null_buffer);
-                self.present = Some(present);
-                for index in null_buffer.valid_indices() {
-                    self.length_encoder.write_one(array.value_length(index));
-                    self.string_bytes.put_slice(array.value(index).as_ref());
-                }
-            }
-            // Simple direct copy from values buffer, extending present if needed
-            (None, _) => {
-                let offsets = array.offsets();
-                let first_offset = offsets[0];
-
-                let mut length_to_copy = <T::Offset as num::Zero>::zero();
-                let mut prev_offset = first_offset;
-                // Derive lengths from offsets then encode them as ints
-                for &offset in offsets.iter().skip(1) {
-                    let length = offset - prev_offset;
-                    self.length_encoder.write_one(length);
-                    length_to_copy += length;
-                    prev_offset = offset;
-                }
-                // Copy all string bytes in a single go
-                // TODO: this cast to i64 to usize can be cleaned up?
-                let first_offset = first_offset.as_i64() as usize;
-                let end_offset = first_offset + length_to_copy.as_i64() as usize;
-                let string_bytes = &array.value_data()[first_offset..end_offset];
-                self.string_bytes.put_slice(string_bytes);
-
-                if let Some(present) = self.present.as_mut() {
-                    present.extend_present(array.len())
-                }
-            }
-        }
-        self.encoded_count += array.len() - array.null_count();
-        Ok(())
-    }
-
-    fn column_encoding(&self) -> ColumnEncoding {
-        ColumnEncoding::DirectV2
-    }
-
-    fn finish(&mut self) -> Vec<Stream> {
-        // TODO: throwing away allocations here
-        let data_bytes = std::mem::take(&mut self.string_bytes);
-        let length_bytes = self.length_encoder.take_inner();
-        let data = Stream {
-            kind: StreamType::Data,
-            bytes: data_bytes.into(),
-        };
-        let length = Stream {
-            kind: StreamType::Length,
-            bytes: length_bytes,
-        };
-        self.encoded_count = 0;
-        match &mut self.present {
-            Some(present) => {
-                let bytes = present.finish();
-                let present = Stream {
-                    kind: StreamType::Present,
-                    bytes,
-                };
-                vec![data, length, present]
-            }
-            None => vec![data, length],
-        }
-    }
-}
-
-pub type FloatColumnEncoder = PrimitiveColumnEncoder<Float32Type, FloatEncoder<f32>>;
-pub type DoubleColumnEncoder = PrimitiveColumnEncoder<Float64Type, FloatEncoder<f64>>;
-pub type ByteColumnEncoder = PrimitiveColumnEncoder<Int8Type, ByteRleEncoder>;
-pub type Int16ColumnEncoder = PrimitiveColumnEncoder<Int16Type, RleV2Encoder<i16, SignedEncoding>>;
-pub type Int32ColumnEncoder = PrimitiveColumnEncoder<Int32Type, RleV2Encoder<i32, SignedEncoding>>;
-pub type Int64ColumnEncoder = PrimitiveColumnEncoder<Int64Type, RleV2Encoder<i64, SignedEncoding>>;
-pub type StringColumnEncoder = GenericBinaryColumnEncoder<GenericStringType<i32>>;
-pub type LargeStringColumnEncoder = GenericBinaryColumnEncoder<GenericStringType<i64>>;
-pub type BinaryColumnEncoder = GenericBinaryColumnEncoder<GenericBinaryType<i32>>;
-pub type LargeBinaryColumnEncoder = GenericBinaryColumnEncoder<GenericBinaryType<i64>>;
diff --git a/src/writer/mod.rs b/src/writer/mod.rs
deleted file mode 100644
index 0fc8f72c..00000000
--- a/src/writer/mod.rs
+++ /dev/null
@@ -1,93 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::fmt::Debug;
-
-use bytes::Bytes;
-
-use crate::proto;
-
-pub mod column;
-pub mod stripe;
-
-#[derive(Debug, Clone, Copy, Eq, PartialEq)]
-pub enum StreamType {
-    Present,
-    Data,
-    Length,
-    DictionaryData,
-    Secondary,
-}
-
-impl From<StreamType> for proto::stream::Kind {
-    fn from(value: StreamType) -> Self {
-        match value {
-            StreamType::Present => proto::stream::Kind::Present,
-            StreamType::Data => proto::stream::Kind::Data,
-            StreamType::Length => proto::stream::Kind::Length,
-            StreamType::DictionaryData => proto::stream::Kind::DictionaryData,
-            StreamType::Secondary => proto::stream::Kind::Secondary,
-        }
-    }
-}
-
-#[derive(Debug, Clone)]
-pub struct Stream {
-    kind: StreamType,
-    bytes: Bytes,
-}
-
-impl Stream {
-    pub fn into_parts(self) -> (StreamType, Bytes) {
-        (self.kind, self.bytes)
-    }
-}
-
-#[derive(Debug, Clone, Copy, Eq, PartialEq)]
-pub enum ColumnEncoding {
-    Direct,
-    DirectV2,
-    Dictionary { size: usize },
-    DictionaryV2 { size: usize },
-}
-
-impl From<&ColumnEncoding> for proto::ColumnEncoding {
-    fn from(value: &ColumnEncoding) -> Self {
-        match value {
-            ColumnEncoding::Direct => proto::ColumnEncoding {
-                kind: Some(proto::column_encoding::Kind::Direct.into()),
-                dictionary_size: None,
-                bloom_encoding: None,
-            },
-            ColumnEncoding::DirectV2 => proto::ColumnEncoding {
-                kind: Some(proto::column_encoding::Kind::DirectV2.into()),
-                dictionary_size: None,
-                bloom_encoding: None,
-            },
-            ColumnEncoding::Dictionary { size } => proto::ColumnEncoding {
-                kind: Some(proto::column_encoding::Kind::Dictionary.into()),
-                dictionary_size: Some(*size as u32),
-                bloom_encoding: None,
-            },
-            ColumnEncoding::DictionaryV2 { size } => proto::ColumnEncoding {
-                kind: Some(proto::column_encoding::Kind::DictionaryV2.into()),
-                dictionary_size: Some(*size as u32),
-                bloom_encoding: None,
-            },
-        }
-    }
-}
diff --git a/src/writer/stripe.rs b/src/writer/stripe.rs
deleted file mode 100644
index e16ee8e8..00000000
--- a/src/writer/stripe.rs
+++ /dev/null
@@ -1,207 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::io::Write;
-
-use arrow::array::RecordBatch;
-use arrow::datatypes::{DataType as ArrowDataType, FieldRef, SchemaRef};
-use prost::Message;
-use snafu::ResultExt;
-
-use crate::error::{IoSnafu, Result};
-use crate::memory::EstimateMemory;
-use crate::proto;
-
-use super::column::{
-    BinaryColumnEncoder, BooleanColumnEncoder, ByteColumnEncoder, ColumnStripeEncoder,
-    DoubleColumnEncoder, FloatColumnEncoder, Int16ColumnEncoder, Int32ColumnEncoder,
-    Int64ColumnEncoder, LargeBinaryColumnEncoder, LargeStringColumnEncoder, StringColumnEncoder,
-};
-use super::{ColumnEncoding, StreamType};
-
-#[derive(Copy, Clone, Eq, Debug, PartialEq)]
-pub struct StripeInformation {
-    pub start_offset: u64,
-    pub index_length: u64,
-    pub data_length: u64,
-    pub footer_length: u64,
-    pub row_count: usize,
-}
-
-impl StripeInformation {
-    pub fn total_byte_size(&self) -> u64 {
-        self.index_length + self.data_length + self.footer_length
-    }
-}
-
-impl From<&StripeInformation> for proto::StripeInformation {
-    fn from(value: &StripeInformation) -> Self {
-        proto::StripeInformation {
-            offset: Some(value.start_offset),
-            index_length: Some(value.index_length),
-            data_length: Some(value.data_length),
-            footer_length: Some(value.footer_length),
-            number_of_rows: Some(value.row_count as u64),
-            encrypt_stripe_id: None,
-            encrypted_local_keys: vec![],
-        }
-    }
-}
-
-/// Encode a stripe. Will encode columns into an in-memory buffer before flushing
-/// entire stripe to the underlying writer.
-pub struct StripeWriter<W> {
-    writer: W,
-    /// Flattened columns, in order of their column ID.
-    columns: Vec<Box<dyn ColumnStripeEncoder>>,
-    pub row_count: usize,
-}
-
-impl<W> EstimateMemory for StripeWriter<W> {
-    /// Used to estimate when stripe size is over threshold and should be flushed
-    /// to the writer and a new stripe started.
-    fn estimate_memory_size(&self) -> usize {
-        self.columns.iter().map(|c| c.estimate_memory_size()).sum()
-    }
-}
-
-impl<W: Write> StripeWriter<W> {
-    pub fn new(writer: W, schema: &SchemaRef) -> Self {
-        let columns = schema.fields().iter().map(create_encoder).collect();
-        Self {
-            writer,
-            columns,
-            row_count: 0,
-        }
-    }
-
-    /// Attempt to encode entire [`RecordBatch`]. Relies on caller slicing the batch
-    /// to required batch size.
-    pub fn encode_batch(&mut self, batch: &RecordBatch) -> Result<()> {
-        // TODO: consider how to handle nested types (including parent nullability)
-        for (array, encoder) in batch.columns().iter().zip(self.columns.iter_mut()) {
-            encoder.encode_array(array)?;
-        }
-        self.row_count += batch.num_rows();
-        Ok(())
-    }
-
-    /// Flush streams to the writer, and write the stripe footer to finish
-    /// the stripe. After this, the [`StripeWriter`] will be reset and ready
-    /// to write a fresh new stripe.
-    ///
-    /// `start_offset` is used to manually keep track of position in the writer (instead
-    /// of relying on Seek).
-    pub fn finish_stripe(&mut self, start_offset: u64) -> Result<StripeInformation> {
-        // Order of column_encodings needs to match final type vec order.
-        // (see arrow_writer::serialize_schema())
-        // Direct encoding to represent root struct
-        let mut column_encodings = vec![ColumnEncoding::Direct];
-        let child_column_encodings = self
-            .columns
-            .iter()
-            .map(|c| c.column_encoding())
-            .collect::<Vec<_>>();
-        column_encodings.extend(child_column_encodings);
-        let column_encodings = column_encodings.iter().map(From::from).collect();
-
-        // Root type won't have any streams
-        let mut written_streams = vec![];
-        let mut data_length = 0;
-        for (index, c) in self.columns.iter_mut().enumerate() {
-            // Offset by 1 to account for root of 0
-            let column = index + 1;
-            let streams = c.finish();
-            // Flush the streams to the writer
-            for s in streams {
-                let (kind, bytes) = s.into_parts();
-                let length = bytes.len();
-                self.writer.write_all(&bytes).context(IoSnafu)?;
-                data_length += length as u64;
-                written_streams.push(WrittenStream {
-                    kind,
-                    column,
-                    length,
-                });
-            }
-        }
-        let streams = written_streams.iter().map(From::from).collect();
-        let stripe_footer = proto::StripeFooter {
-            streams,
-            columns: column_encodings,
-            writer_timezone: None,
-            encryption: vec![],
-        };
-
-        let footer_bytes = stripe_footer.encode_to_vec();
-        let footer_length = footer_bytes.len() as u64;
-        let row_count = self.row_count;
-        self.writer.write_all(&footer_bytes).context(IoSnafu)?;
-
-        // Reset state for next stripe
-        self.row_count = 0;
-
-        Ok(StripeInformation {
-            start_offset,
-            index_length: 0,
-            data_length,
-            footer_length,
-            row_count,
-        })
-    }
-
-    /// When finished writing all stripes, return the inner writer.
-    pub fn finish(self) -> W {
-        self.writer
-    }
-}
-
-fn create_encoder(field: &FieldRef) -> Box<dyn ColumnStripeEncoder> {
-    match field.data_type() {
-        ArrowDataType::Float32 => Box::new(FloatColumnEncoder::new(ColumnEncoding::Direct)),
-        ArrowDataType::Float64 => Box::new(DoubleColumnEncoder::new(ColumnEncoding::Direct)),
-        ArrowDataType::Int8 => Box::new(ByteColumnEncoder::new(ColumnEncoding::Direct)),
-        ArrowDataType::Int16 => Box::new(Int16ColumnEncoder::new(ColumnEncoding::DirectV2)),
-        ArrowDataType::Int32 => Box::new(Int32ColumnEncoder::new(ColumnEncoding::DirectV2)),
-        ArrowDataType::Int64 => Box::new(Int64ColumnEncoder::new(ColumnEncoding::DirectV2)),
-        ArrowDataType::Utf8 => Box::new(StringColumnEncoder::new()),
-        ArrowDataType::LargeUtf8 => Box::new(LargeStringColumnEncoder::new()),
-        ArrowDataType::Binary => Box::new(BinaryColumnEncoder::new()),
-        ArrowDataType::LargeBinary => Box::new(LargeBinaryColumnEncoder::new()),
-        ArrowDataType::Boolean => Box::new(BooleanColumnEncoder::new()),
-        // TODO: support more datatypes
-        _ => unimplemented!("unsupported datatype"),
-    }
-}
-
-#[derive(Copy, Clone, Debug, Eq, PartialEq)]
-struct WrittenStream {
-    kind: StreamType,
-    column: usize,
-    length: usize,
-}
-
-impl From<&WrittenStream> for proto::Stream {
-    fn from(value: &WrittenStream) -> Self {
-        let kind = proto::stream::Kind::from(value.kind);
-        proto::Stream {
-            kind: Some(kind.into()),
-            column: Some(value.column as u32),
-            length: Some(value.length as u64),
-        }
-    }
-}
diff --git a/tests/basic/data/alltypes.lz4.orc b/tests/basic/data/alltypes.lz4.orc
deleted file mode 100644
index c1b2cc93..00000000
Binary files a/tests/basic/data/alltypes.lz4.orc and /dev/null differ
diff --git a/tests/basic/data/alltypes.lzo.orc b/tests/basic/data/alltypes.lzo.orc
deleted file mode 100644
index 2aa191ad..00000000
Binary files a/tests/basic/data/alltypes.lzo.orc and /dev/null differ
diff --git a/tests/basic/data/alltypes.none.orc b/tests/basic/data/alltypes.none.orc
deleted file mode 100644
index c4d27d9a..00000000
Binary files a/tests/basic/data/alltypes.none.orc and /dev/null differ
diff --git a/tests/basic/data/alltypes.zlib.orc b/tests/basic/data/alltypes.zlib.orc
deleted file mode 100644
index a2bb38db..00000000
Binary files a/tests/basic/data/alltypes.zlib.orc and /dev/null differ
diff --git a/tests/basic/data/alltypes.zstd.orc b/tests/basic/data/alltypes.zstd.orc
deleted file mode 100644
index 7ae0600f..00000000
Binary files a/tests/basic/data/alltypes.zstd.orc and /dev/null differ
diff --git a/tests/basic/data/demo-11-zlib.orc b/tests/basic/data/demo-11-zlib.orc
deleted file mode 100644
index db0ff15e..00000000
Binary files a/tests/basic/data/demo-11-zlib.orc and /dev/null differ
diff --git a/tests/basic/data/demo-12-zlib.orc b/tests/basic/data/demo-12-zlib.orc
deleted file mode 100644
index 862dd27a..00000000
Binary files a/tests/basic/data/demo-12-zlib.orc and /dev/null differ
diff --git a/tests/basic/data/f32_long_long_gzip.orc b/tests/basic/data/f32_long_long_gzip.orc
deleted file mode 100644
index 79bc2480..00000000
Binary files a/tests/basic/data/f32_long_long_gzip.orc and /dev/null differ
diff --git a/tests/basic/data/long_bool.orc b/tests/basic/data/long_bool.orc
deleted file mode 100644
index 629a4428..00000000
Binary files a/tests/basic/data/long_bool.orc and /dev/null differ
diff --git a/tests/basic/data/long_bool_gzip.orc b/tests/basic/data/long_bool_gzip.orc
deleted file mode 100644
index 36d61b65..00000000
Binary files a/tests/basic/data/long_bool_gzip.orc and /dev/null differ
diff --git a/tests/basic/data/nested_array.orc b/tests/basic/data/nested_array.orc
deleted file mode 100644
index 75a741e4..00000000
Binary files a/tests/basic/data/nested_array.orc and /dev/null differ
diff --git a/tests/basic/data/nested_array_float.orc b/tests/basic/data/nested_array_float.orc
deleted file mode 100644
index 341f00b1..00000000
Binary files a/tests/basic/data/nested_array_float.orc and /dev/null differ
diff --git a/tests/basic/data/nested_array_struct.orc b/tests/basic/data/nested_array_struct.orc
deleted file mode 100644
index be822746..00000000
Binary files a/tests/basic/data/nested_array_struct.orc and /dev/null differ
diff --git a/tests/basic/data/nested_map.orc b/tests/basic/data/nested_map.orc
deleted file mode 100644
index 07e5dde3..00000000
Binary files a/tests/basic/data/nested_map.orc and /dev/null differ
diff --git a/tests/basic/data/nested_map_struct.orc b/tests/basic/data/nested_map_struct.orc
deleted file mode 100644
index 4c95e3c7..00000000
Binary files a/tests/basic/data/nested_map_struct.orc and /dev/null differ
diff --git a/tests/basic/data/nested_struct.orc b/tests/basic/data/nested_struct.orc
deleted file mode 100644
index d81db47d..00000000
Binary files a/tests/basic/data/nested_struct.orc and /dev/null differ
diff --git a/tests/basic/data/overflowing_timestamps.orc b/tests/basic/data/overflowing_timestamps.orc
deleted file mode 100644
index ca798f0a..00000000
Binary files a/tests/basic/data/overflowing_timestamps.orc and /dev/null differ
diff --git a/tests/basic/data/pyarrow_timestamps.orc b/tests/basic/data/pyarrow_timestamps.orc
deleted file mode 100644
index 8ada92c8..00000000
Binary files a/tests/basic/data/pyarrow_timestamps.orc and /dev/null differ
diff --git a/tests/basic/data/string_dict.orc b/tests/basic/data/string_dict.orc
deleted file mode 100644
index 07a9f47a..00000000
Binary files a/tests/basic/data/string_dict.orc and /dev/null differ
diff --git a/tests/basic/data/string_dict_gzip.orc b/tests/basic/data/string_dict_gzip.orc
deleted file mode 100644
index b78349b8..00000000
Binary files a/tests/basic/data/string_dict_gzip.orc and /dev/null differ
diff --git a/tests/basic/data/string_long.orc b/tests/basic/data/string_long.orc
deleted file mode 100644
index e22856fb..00000000
Binary files a/tests/basic/data/string_long.orc and /dev/null differ
diff --git a/tests/basic/data/string_long_long.orc b/tests/basic/data/string_long_long.orc
deleted file mode 100644
index 110b6b2b..00000000
Binary files a/tests/basic/data/string_long_long.orc and /dev/null differ
diff --git a/tests/basic/data/string_long_long_gzip.orc b/tests/basic/data/string_long_long_gzip.orc
deleted file mode 100644
index f398b0cd..00000000
Binary files a/tests/basic/data/string_long_long_gzip.orc and /dev/null differ
diff --git a/tests/basic/data/test.orc b/tests/basic/data/test.orc
deleted file mode 100644
index 135d527c..00000000
Binary files a/tests/basic/data/test.orc and /dev/null differ
diff --git a/tests/basic/main.rs b/tests/basic/main.rs
deleted file mode 100644
index 84d62d82..00000000
--- a/tests/basic/main.rs
+++ /dev/null
@@ -1,627 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::fs::File;
-use std::ops::Range;
-use std::sync::Arc;
-
-use arrow::datatypes::{DataType, Decimal128Type, DecimalType, Field, Schema, TimeUnit};
-use arrow::record_batch::{RecordBatch, RecordBatchReader};
-use arrow::util::pretty;
-#[cfg(feature = "async")]
-use futures_util::TryStreamExt;
-use orc_rust::arrow_reader::{ArrowReader, ArrowReaderBuilder};
-#[cfg(feature = "async")]
-use orc_rust::async_arrow_reader::ArrowStreamReader;
-use orc_rust::projection::ProjectionMask;
-
-use crate::misc::{LONG_BOOL_EXPECTED, LONG_STRING_DICT_EXPECTED, LONG_STRING_EXPECTED};
-
-mod misc;
-
-fn new_arrow_reader(path: &str, fields: &[&str]) -> ArrowReader<File> {
-    let f = File::open(path).expect("no file found");
-    let builder = ArrowReaderBuilder::try_new(f).unwrap();
-    let projection = ProjectionMask::named_roots(builder.file_metadata().root_data_type(), fields);
-    builder.with_projection(projection).build()
-}
-
-#[cfg(feature = "async")]
-async fn new_arrow_stream_reader_root(path: &str) -> ArrowStreamReader<tokio::fs::File> {
-    let f = tokio::fs::File::open(path).await.unwrap();
-    ArrowReaderBuilder::try_new_async(f)
-        .await
-        .unwrap()
-        .build_async()
-}
-
-#[cfg(feature = "async")]
-async fn new_arrow_stream_reader_range(
-    path: &str,
-    range: Range<usize>,
-) -> ArrowStreamReader<tokio::fs::File> {
-    let f = tokio::fs::File::open(path).await.unwrap();
-    ArrowReaderBuilder::try_new_async(f)
-        .await
-        .unwrap()
-        .with_file_byte_range(range)
-        .build_async()
-}
-
-fn new_arrow_reader_root(path: &str) -> ArrowReader<File> {
-    let f = File::open(path).expect("no file found");
-    ArrowReaderBuilder::try_new(f).unwrap().build()
-}
-
-fn new_arrow_reader_range(path: &str, range: Range<usize>) -> ArrowReader<File> {
-    let f = File::open(path).expect("no file found");
-    ArrowReaderBuilder::try_new(f)
-        .unwrap()
-        .with_file_byte_range(range)
-        .build()
-}
-
-fn basic_path(path: &str) -> String {
-    let dir = env!("CARGO_MANIFEST_DIR");
-    format!("{}/tests/basic/data/{}", dir, path)
-}
-
-#[test]
-pub fn test_read_long_bool() {
-    let path = basic_path("long_bool.orc");
-    let reader = new_arrow_reader(&path, &["long"]);
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    assert_eq!(32, batch[0].column(0).len());
-    assert_eq!(
-        LONG_BOOL_EXPECTED,
-        pretty::pretty_format_batches(&batch).unwrap().to_string()
-    )
-}
-
-#[test]
-pub fn test_read_long_bool_gzip() {
-    let path = basic_path("long_bool_gzip.orc");
-    let reader = new_arrow_reader(&path, &["long"]);
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    assert_eq!(32, batch[0].column(0).len());
-    assert_eq!(
-        LONG_BOOL_EXPECTED,
-        pretty::pretty_format_batches(&batch).unwrap().to_string()
-    )
-}
-
-#[test]
-pub fn test_read_long_string() {
-    let path = basic_path("string_long.orc");
-    let reader = new_arrow_reader(&path, &["dict"]);
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    assert_eq!(64, batch[0].column(0).len());
-
-    assert_eq!(
-        LONG_STRING_EXPECTED,
-        pretty::pretty_format_batches(&batch).unwrap().to_string()
-    )
-}
-
-#[test]
-pub fn test_read_string_dirt() {
-    let path = basic_path("string_dict.orc");
-    let reader = new_arrow_reader(&path, &["dict"]);
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    assert_eq!(64, batch[0].column(0).len());
-    assert_eq!(
-        LONG_STRING_DICT_EXPECTED,
-        pretty::pretty_format_batches(&batch).unwrap().to_string()
-    )
-}
-
-#[test]
-pub fn test_read_string_dirt_gzip() {
-    let path = basic_path("string_dict_gzip.orc");
-    let reader = new_arrow_reader(&path, &["dict"]);
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    assert_eq!(64, batch[0].column(0).len());
-    assert_eq!(
-        LONG_STRING_DICT_EXPECTED,
-        pretty::pretty_format_batches(&batch).unwrap().to_string()
-    )
-}
-
-#[test]
-pub fn test_read_string_long_long() {
-    let path = basic_path("string_long_long.orc");
-    let reader = new_arrow_reader(&path, &["dict"]);
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    assert_eq!(8192, batch[0].column(0).len());
-    assert_eq!(10_000 - 8192, batch[1].column(0).len());
-}
-
-#[test]
-pub fn test_read_f32_long_long_gzip() {
-    let path = basic_path("f32_long_long_gzip.orc");
-    let reader = new_arrow_reader(&path, &["dict"]);
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    let total: usize = batch.iter().map(|c| c.column(0).len()).sum();
-
-    assert_eq!(total, 1_000_000);
-}
-
-#[test]
-pub fn test_read_string_long_long_gzip() {
-    let path = basic_path("string_long_long_gzip.orc");
-    let reader = new_arrow_reader(&path, &["dict"]);
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    assert_eq!(8192, batch[0].column(0).len());
-    assert_eq!(10_000 - 8192, batch[1].column(0).len());
-}
-
-#[test]
-pub fn basic_test() {
-    let path = basic_path("test.orc");
-    let reader = new_arrow_reader(&path, &["a", "b", "str_direct", "d", "e", "f"]);
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    let expected = [
-        "+-----+-------+------------+-----+-----+-------+",
-        "| a   | b     | str_direct | d   | e   | f     |",
-        "+-----+-------+------------+-----+-----+-------+",
-        "| 1.0 | true  | a          | a   | ddd | aaaaa |",
-        "| 2.0 | false | cccccc     | bb  | cc  | bbbbb |",
-        "|     |       |            |     |     |       |",
-        "| 4.0 | true  | ddd        | ccc | bb  | ccccc |",
-        "| 5.0 | false | ee         | ddd | a   | ddddd |",
-        "+-----+-------+------------+-----+-----+-------+",
-    ];
-    assert_batches_eq(&batch, &expected);
-}
-
-#[test]
-pub fn basic_test_2() {
-    let path = basic_path("test.orc");
-    let reader = new_arrow_reader(
-        &path,
-        &[
-            "int_short_repeated",
-            "int_neg_short_repeated",
-            "int_delta",
-            "int_neg_delta",
-            "int_direct",
-            "int_neg_direct",
-            "bigint_direct",
-            "bigint_neg_direct",
-            "bigint_other",
-            "utf8_increase",
-            "utf8_decrease",
-        ],
-    );
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    let expected = [
-        "+--------------------+------------------------+-----------+---------------+------------+----------------+---------------+-------------------+--------------+---------------+---------------+",
-        "| int_short_repeated | int_neg_short_repeated | int_delta | int_neg_delta | int_direct | int_neg_direct | bigint_direct | bigint_neg_direct | bigint_other | utf8_increase | utf8_decrease |",
-        "+--------------------+------------------------+-----------+---------------+------------+----------------+---------------+-------------------+--------------+---------------+---------------+",
-        "| 5                  | -5                     | 1         | 5             | 1          | -1             | 1             | -1                | 5            | a             | eeeee         |",
-        "| 5                  | -5                     | 2         | 4             | 6          | -6             | 6             | -6                | -5           | bb            | dddd          |",
-        "|                    |                        |           |               |            |                |               |                   | 1            | ccc           | ccc           |",
-        "| 5                  | -5                     | 4         | 2             | 3          | -3             | 3             | -3                | 5            | dddd          | bb            |",
-        "| 5                  | -5                     | 5         | 1             | 2          | -2             | 2             | -2                | 5            | eeeee         | a             |",
-        "+--------------------+------------------------+-----------+---------------+------------+----------------+---------------+-------------------+--------------+---------------+---------------+",
-    ];
-    assert_batches_eq(&batch, &expected);
-}
-
-#[test]
-pub fn basic_test_3() {
-    let path = basic_path("test.orc");
-    let reader = new_arrow_reader(&path, &["timestamp_simple", "date_simple"]);
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    let expected = [
-        "+----------------------------+-------------+",
-        "| timestamp_simple           | date_simple |",
-        "+----------------------------+-------------+",
-        "| 2023-04-01T20:15:30.002    | 2023-04-01  |",
-        "| 2021-08-22T07:26:44.525777 | 2023-03-01  |",
-        "| 2023-01-01T00:00:00        | 2023-01-01  |",
-        "| 2023-02-01T00:00:00        | 2023-02-01  |",
-        "| 2023-03-01T00:00:00        | 2023-03-01  |",
-        "+----------------------------+-------------+",
-    ];
-    assert_batches_eq(&batch, &expected);
-}
-
-#[test]
-pub fn basic_test_nested_struct() {
-    let path = basic_path("nested_struct.orc");
-    let reader = new_arrow_reader_root(&path);
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-    let expected = [
-        "+-------------------+",
-        "| nest              |",
-        "+-------------------+",
-        "| {a: 1.0, b: true} |",
-        "| {a: 3.0, b: }     |",
-        "| {a: , b: }        |",
-        "|                   |",
-        "| {a: -3.0, b: }    |",
-        "+-------------------+",
-    ];
-    assert_batches_eq(&batch, &expected);
-}
-
-#[test]
-pub fn basic_test_nested_array() {
-    let path = basic_path("nested_array.orc");
-    let reader = new_arrow_reader_root(&path);
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    let expected = [
-        "+--------------------+",
-        "| value              |",
-        "+--------------------+",
-        "| [1, , 3, 43, 5]    |",
-        "| [5, , 32, 4, 15]   |",
-        "| [16, , 3, 4, 5, 6] |",
-        "|                    |",
-        "| [3, ]              |",
-        "+--------------------+",
-    ];
-    assert_batches_eq(&batch, &expected);
-}
-
-#[test]
-pub fn basic_test_nested_array_float() {
-    let path = basic_path("nested_array_float.orc");
-    let reader = new_arrow_reader_root(&path);
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    let expected = [
-        "+------------+",
-        "| value      |",
-        "+------------+",
-        "| [1.0, 3.0] |",
-        "| [, 2.0]    |",
-        "+------------+",
-    ];
-    assert_batches_eq(&batch, &expected);
-}
-
-#[test]
-pub fn basic_test_nested_array_struct() {
-    let path = basic_path("nested_array_struct.orc");
-    let reader = new_arrow_reader_root(&path);
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    let expected = [
-        "+------------------------------------------------+",
-        "| value                                          |",
-        "+------------------------------------------------+",
-        "| [{a: 1.0, b: 1, c: 01}, {a: 2.0, b: 2, c: 02}] |",
-        "| [, {a: 3.0, b: 3, c: 03}]                      |",
-        "+------------------------------------------------+",
-    ];
-    assert_batches_eq(&batch, &expected);
-}
-
-#[test]
-pub fn basic_test_nested_map_struct() {
-    let path = basic_path("nested_map_struct.orc");
-    let reader = new_arrow_reader_root(&path);
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    let expected = [
-        "+--------------------------------------------------------+",
-        "| value                                                  |",
-        "+--------------------------------------------------------+",
-        "| {01: {a: 1.0, b: 1, c: 01}, 02: {a: 2.0, b: 1, c: 02}} |",
-        "|                                                        |",
-        "| {03: {a: 3.0, b: 3, c: 03}, 04: {a: 4.0, b: 4, c: 04}} |",
-        "+--------------------------------------------------------+",
-    ];
-    assert_batches_eq(&batch, &expected);
-}
-
-#[test]
-pub fn basic_test_nested_map() {
-    let path = basic_path("nested_map.orc");
-    let reader = new_arrow_reader_root(&path);
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    let expected = [
-        "+--------------------------+",
-        "| map                      |",
-        "+--------------------------+",
-        "| {zero: 0, one: 1}        |",
-        "|                          |",
-        "| {two: 2, tree: 3}        |",
-        "| {one: 1, two: 2, nill: } |",
-        "+--------------------------+",
-    ];
-    assert_batches_eq(&batch, &expected);
-}
-
-#[test]
-pub fn basic_test_0() {
-    let path = basic_path("test.orc");
-    let reader = new_arrow_reader_root(&path);
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    let expected = [
-        "+-----+-------+------------+-----+-----+-------+--------------------+------------------------+-----------+---------------+------------+----------------+---------------+-------------------+--------------+---------------+---------------+----------------------------+-------------+----------------+",
-        "| a   | b     | str_direct | d   | e   | f     | int_short_repeated | int_neg_short_repeated | int_delta | int_neg_delta | int_direct | int_neg_direct | bigint_direct | bigint_neg_direct | bigint_other | utf8_increase | utf8_decrease | timestamp_simple           | date_simple | tinyint_simple |",
-        "+-----+-------+------------+-----+-----+-------+--------------------+------------------------+-----------+---------------+------------+----------------+---------------+-------------------+--------------+---------------+---------------+----------------------------+-------------+----------------+",
-        "| 1.0 | true  | a          | a   | ddd | aaaaa | 5                  | -5                     | 1         | 5             | 1          | -1             | 1             | -1                | 5            | a             | eeeee         | 2023-04-01T20:15:30.002    | 2023-04-01  | -1             |",
-        "| 2.0 | false | cccccc     | bb  | cc  | bbbbb | 5                  | -5                     | 2         | 4             | 6          | -6             | 6             | -6                | -5           | bb            | dddd          | 2021-08-22T07:26:44.525777 | 2023-03-01  |                |",
-        "|     |       |            |     |     |       |                    |                        |           |               |            |                |               |                   | 1            | ccc           | ccc           | 2023-01-01T00:00:00        | 2023-01-01  | 1              |",
-        "| 4.0 | true  | ddd        | ccc | bb  | ccccc | 5                  | -5                     | 4         | 2             | 3          | -3             | 3             | -3                | 5            | dddd          | bb            | 2023-02-01T00:00:00        | 2023-02-01  | 127            |",
-        "| 5.0 | false | ee         | ddd | a   | ddddd | 5                  | -5                     | 5         | 1             | 2          | -2             | 2             | -2                | 5            | eeeee         | a             | 2023-03-01T00:00:00        | 2023-03-01  | -127           |",
-        "+-----+-------+------------+-----+-----+-------+--------------------+------------------------+-----------+---------------+------------+----------------+---------------+-------------------+--------------+---------------+---------------+----------------------------+-------------+----------------+",
-    ];
-    assert_batches_eq(&batch, &expected);
-}
-
-#[test]
-pub fn basic_test_with_range() {
-    let path = basic_path("test.orc");
-    let reader = new_arrow_reader_range(&path, 0..2000);
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    assert_eq!(5, batch[0].column(0).len());
-}
-
-#[test]
-pub fn basic_test_with_range_without_data() {
-    let path = basic_path("test.orc");
-    let reader = new_arrow_reader_range(&path, 100..2000);
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    assert_eq!(0, batch.len());
-}
-
-#[cfg(feature = "async")]
-#[tokio::test]
-pub async fn async_basic_test_with_range() {
-    let path = basic_path("test.orc");
-    let reader = new_arrow_stream_reader_range(&path, 0..2000).await;
-    let batch = reader.try_collect::<Vec<_>>().await.unwrap();
-
-    assert_eq!(5, batch[0].column(0).len());
-}
-
-#[cfg(feature = "async")]
-#[tokio::test]
-pub async fn async_basic_test_with_range_without_data() {
-    let path = basic_path("test.orc");
-    let reader = new_arrow_stream_reader_range(&path, 100..2000).await;
-    let batch = reader.try_collect::<Vec<_>>().await.unwrap();
-
-    assert_eq!(0, batch.len());
-}
-
-#[cfg(feature = "async")]
-#[tokio::test]
-pub async fn async_basic_test_0() {
-    let path = basic_path("test.orc");
-    let reader = new_arrow_stream_reader_root(&path).await;
-    let batch = reader.try_collect::<Vec<_>>().await.unwrap();
-
-    let expected = [
-        "+-----+-------+------------+-----+-----+-------+--------------------+------------------------+-----------+---------------+------------+----------------+---------------+-------------------+--------------+---------------+---------------+----------------------------+-------------+----------------+",
-        "| a   | b     | str_direct | d   | e   | f     | int_short_repeated | int_neg_short_repeated | int_delta | int_neg_delta | int_direct | int_neg_direct | bigint_direct | bigint_neg_direct | bigint_other | utf8_increase | utf8_decrease | timestamp_simple           | date_simple | tinyint_simple |",
-        "+-----+-------+------------+-----+-----+-------+--------------------+------------------------+-----------+---------------+------------+----------------+---------------+-------------------+--------------+---------------+---------------+----------------------------+-------------+----------------+",
-        "| 1.0 | true  | a          | a   | ddd | aaaaa | 5                  | -5                     | 1         | 5             | 1          | -1             | 1             | -1                | 5            | a             | eeeee         | 2023-04-01T20:15:30.002    | 2023-04-01  | -1             |",
-        "| 2.0 | false | cccccc     | bb  | cc  | bbbbb | 5                  | -5                     | 2         | 4             | 6          | -6             | 6             | -6                | -5           | bb            | dddd          | 2021-08-22T07:26:44.525777 | 2023-03-01  |                |",
-        "|     |       |            |     |     |       |                    |                        |           |               |            |                |               |                   | 1            | ccc           | ccc           | 2023-01-01T00:00:00        | 2023-01-01  | 1              |",
-        "| 4.0 | true  | ddd        | ccc | bb  | ccccc | 5                  | -5                     | 4         | 2             | 3          | -3             | 3             | -3                | 5            | dddd          | bb            | 2023-02-01T00:00:00        | 2023-02-01  | 127            |",
-        "| 5.0 | false | ee         | ddd | a   | ddddd | 5                  | -5                     | 5         | 1             | 2          | -2             | 2             | -2                | 5            | eeeee         | a             | 2023-03-01T00:00:00        | 2023-03-01  | -127           |",
-        "+-----+-------+------------+-----+-----+-------+--------------------+------------------------+-----------+---------------+------------+----------------+---------------+-------------------+--------------+---------------+---------------+----------------------------+-------------+----------------+",
-    ];
-    assert_batches_eq(&batch, &expected);
-}
-
-#[test]
-pub fn v0_file_test() {
-    let path = basic_path("demo-11-zlib.orc");
-    let reader = new_arrow_reader_root(&path);
-    let expected_row_count = reader.total_row_count();
-    let batches = reader.collect::<Result<Vec<_>, _>>().unwrap();
-    let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
-    assert_eq!(expected_row_count as usize, total_rows);
-}
-
-#[test]
-pub fn v1_file_test() {
-    let path = basic_path("demo-12-zlib.orc");
-    let reader = new_arrow_reader_root(&path);
-    let expected_row_count = reader.total_row_count();
-    let batches = reader.collect::<Result<Vec<_>, _>>().unwrap();
-    let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
-    assert_eq!(expected_row_count as usize, total_rows);
-}
-
-#[cfg(feature = "async")]
-#[tokio::test]
-pub async fn v0_file_test_async() {
-    let path = basic_path("demo-11-zlib.orc");
-    let reader = new_arrow_stream_reader_root(&path).await;
-    let batches = reader.try_collect::<Vec<_>>().await.unwrap();
-    let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
-    assert_eq!(1_920_800, total_rows);
-}
-
-#[test]
-pub fn alltypes_test() {
-    let compressions = ["none", "snappy", "zlib", "lzo", "zstd", "lz4"];
-    for compression in compressions {
-        let path = basic_path(&format!("alltypes.{compression}.orc"));
-        let reader = new_arrow_reader_root(&path);
-        let batches = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-        let expected = [
-            "+---------+------+--------+-------------+----------------------+------------+----------------+------------------+--------------------------+----------+------------+",
-            "| boolean | int8 | int16  | int32       | int64                | float32    | float64        | decimal          | binary                   | utf8     | date32     |",
-            "+---------+------+--------+-------------+----------------------+------------+----------------+------------------+--------------------------+----------+------------+",
-            "|         |      |        |             |                      |            |                |                  |                          |          |            |",
-            "| true    | 0    | 0      | 0           | 0                    | 0.0        | 0.0            | 0.00000          |                          |          | 1970-01-01 |",
-            "| false   | 1    | 1      | 1           | 1                    | 1.0        | 1.0            | 1.00000          | 61                       | a        | 1970-01-02 |",
-            "| false   | -1   | -1     | -1          | -1                   | -1.0       | -1.0           | -1.00000         | 20                       |          | 1969-12-31 |",
-            "| true    | 127  | 32767  | 2147483647  | 9223372036854775807  | inf        | inf            | 123456789.12345  | 656e636f6465             | encode   | 9999-12-31 |",
-            "| true    | -128 | -32768 | -2147483648 | -9223372036854775808 | -inf       | -inf           | -999999999.99999 | 6465636f6465             | decode   | 1582-10-15 |",
-            "| true    | 50   | 50     | 50          | 50                   | 3.1415927  | 3.14159265359  | -31256.12300     | e5a4a7e7868ae5928ce5a58f | 大熊和奏 | 1582-10-16 |",
-            "| true    | 51   | 51     | 51          | 51                   | -3.1415927 | -3.14159265359 | 1241000.00000    | e69689e897a4e69cb1e5a48f | 斉藤朱夏 | 2000-01-01 |",
-            "| true    | 52   | 52     | 52          | 52                   | 1.1        | 1.1            | 1.10000          | e988b4e58e9fe5b88ce5ae9f | 鈴原希実 | 3000-12-31 |",
-            "| false   | 53   | 53     | 53          | 53                   | -1.1       | -1.1           | 0.99999          | f09fa494                 | 🤔       | 1900-01-01 |",
-            "|         |      |        |             |                      |            |                |                  |                          |          |            |",
-            "+---------+------+--------+-------------+----------------------+------------+----------------+------------------+--------------------------+----------+------------+",
-        ];
-        assert_batches_eq(&batches, &expected);
-    }
-}
-
-#[test]
-pub fn timestamps_test() {
-    let path = basic_path("pyarrow_timestamps.orc");
-    let reader = new_arrow_reader_root(&path);
-    let schema = reader.schema();
-    let batches = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    let expected = [
-        "+---------------------+----------------------+",
-        "| timestamp_notz      | timestamp_utc        |",
-        "+---------------------+----------------------+",
-        "|                     |                      |",
-        "| 1970-01-01T00:00:00 | 1970-01-01T00:00:00Z |",
-        "| 1970-01-02T23:59:59 | 1970-01-02T23:59:59Z |",
-        "| 1969-12-31T23:59:59 | 1969-12-31T23:59:59Z |",
-        "| 2262-04-11T11:47:16 | 2262-04-11T11:47:16Z |",
-        "| 2001-04-13T02:14:00 | 2001-04-13T02:14:00Z |",
-        "| 2000-01-01T23:10:10 | 2000-01-01T23:10:10Z |",
-        "| 1900-01-01T14:25:14 | 1900-01-01T14:25:14Z |",
-        "+---------------------+----------------------+",
-    ];
-    assert_batches_eq(&batches, &expected);
-
-    let expected_schema = Arc::new(Schema::new(vec![
-        Field::new(
-            "timestamp_notz",
-            DataType::Timestamp(TimeUnit::Nanosecond, None),
-            true,
-        ),
-        Field::new(
-            "timestamp_utc",
-            DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())),
-            true,
-        ),
-    ]));
-    assert_eq!(schema, expected_schema);
-    for batch in &batches {
-        assert_eq!(batch.schema(), expected_schema);
-    }
-}
-
-#[test]
-pub fn overflowing_timestamps_test() {
-    let path = basic_path("overflowing_timestamps.orc");
-    let reader = new_arrow_reader_root(&path);
-    assert!(reader.collect::<Result<Vec<_>, _>>().is_err());
-}
-
-#[test]
-pub fn second_timestamps_test() {
-    custom_precision_timestamps_test(TimeUnit::Second)
-}
-
-#[test]
-pub fn millisecond_timestamps_test() {
-    custom_precision_timestamps_test(TimeUnit::Millisecond)
-}
-
-#[test]
-pub fn microsecond_timestamps_test() {
-    custom_precision_timestamps_test(TimeUnit::Microsecond)
-}
-
-fn custom_precision_timestamps_test(time_unit: TimeUnit) {
-    let path = basic_path("overflowing_timestamps.orc");
-    let f = File::open(path).expect("no file found");
-    let reader = ArrowReaderBuilder::try_new(f)
-        .unwrap()
-        .with_schema(Arc::new(Schema::new(vec![
-            Field::new("id", DataType::Int32, true),
-            Field::new("ts", DataType::Timestamp(time_unit, None), true),
-        ])))
-        .build();
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-    let expected = [
-        "+----+---------------------+",
-        "| id | ts                  |",
-        "+----+---------------------+",
-        "| 1  | 1970-05-23T21:21:18 |",
-        "| 2  | 0001-01-01T00:00:00 |",
-        "| 3  | 1970-05-23T21:21:18 |",
-        "+----+---------------------+",
-    ];
-    assert_batches_eq(&batch, &expected);
-}
-
-#[test]
-pub fn decimal128_timestamps_test() {
-    let path = basic_path("overflowing_timestamps.orc");
-    let f = File::open(path).expect("no file found");
-    let reader = ArrowReaderBuilder::try_new(f)
-        .unwrap()
-        .with_schema(Arc::new(Schema::new(vec![
-            Field::new("id", DataType::Int32, true),
-            Field::new(
-                "ts",
-                DataType::Decimal128(Decimal128Type::MAX_PRECISION, 9),
-                true,
-            ),
-        ])))
-        .build();
-    let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();
-    println!("{:?}", batch[0].column(1));
-    let expected = [
-        "+----+------------------------+",
-        "| id | ts                     |",
-        "+----+------------------------+",
-        "| 1  | 12345678.000000000     |",
-        "| 2  | -62135596800.000000000 |",
-        "| 3  | 12345678.000000000     |",
-        "+----+------------------------+",
-    ];
-    assert_batches_eq(&batch, &expected);
-}
-
-// From https://github.com/apache/arrow-rs/blob/7705acad845e8b2a366a08640f7acb4033ed7049/arrow-flight/src/sql/metadata/mod.rs#L67-L75
-pub fn assert_batches_eq(batches: &[RecordBatch], expected_lines: &[&str]) {
-    let formatted = pretty::pretty_format_batches(batches).unwrap().to_string();
-    let actual_lines: Vec<_> = formatted.trim().lines().collect();
-    assert_eq!(
-        &actual_lines, expected_lines,
-        "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
-        expected_lines, actual_lines
-    );
-}
diff --git a/tests/basic/misc.rs b/tests/basic/misc.rs
deleted file mode 100644
index 02633321..00000000
--- a/tests/basic/misc.rs
+++ /dev/null
@@ -1,191 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub const LONG_STRING_DICT_EXPECTED: &str = r#"+------+
-| dict |
-+------+
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-| abc  |
-| efgh |
-+------+"#;
-
-pub const LONG_STRING_EXPECTED: &str = r#"+------+
-| dict |
-+------+
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-| abcd |
-| efgh |
-+------+"#;
-
-pub const LONG_BOOL_EXPECTED: &str = r#"+------+
-| long |
-+------+
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-| true |
-+------+"#;
diff --git a/tests/integration/data/README.md b/tests/integration/data/README.md
deleted file mode 100644
index 97ab678b..00000000
--- a/tests/integration/data/README.md
+++ /dev/null
@@ -1 +0,0 @@
-These files are imported from [Apache ORC's examples](https://github.com/apache/orc/tree/207085de3722054485e685811f8e5f2e11aa4deb/examples)
diff --git a/tests/integration/data/TestCSVFileImport.test10rows.csv b/tests/integration/data/TestCSVFileImport.test10rows.csv
deleted file mode 100644
index 5404dac1..00000000
--- a/tests/integration/data/TestCSVFileImport.test10rows.csv
+++ /dev/null
@@ -1,10 +0,0 @@
-0,a,0.0
-1,b,1.1
-2,c,2.2
-3,d,
-4,,4.4
-,f,5.5
-,,
-7,h,7.7
-8,i,8.8
-9,j,9.9
\ No newline at end of file
diff --git a/tests/integration/data/TestCSVFileImport.testTimezoneOption.csv b/tests/integration/data/TestCSVFileImport.testTimezoneOption.csv
deleted file mode 100644
index 9e87f23a..00000000
--- a/tests/integration/data/TestCSVFileImport.testTimezoneOption.csv
+++ /dev/null
@@ -1 +0,0 @@
-2021-12-27 00:00:00.000
\ No newline at end of file
diff --git a/tests/integration/data/TestOrcFile.columnProjection.orc b/tests/integration/data/TestOrcFile.columnProjection.orc
deleted file mode 100644
index b0f91f37..00000000
Binary files a/tests/integration/data/TestOrcFile.columnProjection.orc and /dev/null differ
diff --git a/tests/integration/data/TestOrcFile.emptyFile.orc b/tests/integration/data/TestOrcFile.emptyFile.orc
deleted file mode 100644
index ecdadcbf..00000000
Binary files a/tests/integration/data/TestOrcFile.emptyFile.orc and /dev/null differ
diff --git a/tests/integration/data/TestOrcFile.metaData.orc b/tests/integration/data/TestOrcFile.metaData.orc
deleted file mode 100644
index d454581c..00000000
Binary files a/tests/integration/data/TestOrcFile.metaData.orc and /dev/null differ
diff --git a/tests/integration/data/TestOrcFile.test1.orc b/tests/integration/data/TestOrcFile.test1.orc
deleted file mode 100644
index 4fb0beff..00000000
Binary files a/tests/integration/data/TestOrcFile.test1.orc and /dev/null differ
diff --git a/tests/integration/data/TestOrcFile.testDate1900.orc b/tests/integration/data/TestOrcFile.testDate1900.orc
deleted file mode 100644
index f51ffdbd..00000000
Binary files a/tests/integration/data/TestOrcFile.testDate1900.orc and /dev/null differ
diff --git a/tests/integration/data/TestOrcFile.testDate2038.orc b/tests/integration/data/TestOrcFile.testDate2038.orc
deleted file mode 100644
index cd11fa8a..00000000
Binary files a/tests/integration/data/TestOrcFile.testDate2038.orc and /dev/null differ
diff --git a/tests/integration/data/TestOrcFile.testMemoryManagementV11.orc b/tests/integration/data/TestOrcFile.testMemoryManagementV11.orc
deleted file mode 100644
index 98852b6e..00000000
Binary files a/tests/integration/data/TestOrcFile.testMemoryManagementV11.orc and /dev/null differ
diff --git a/tests/integration/data/TestOrcFile.testMemoryManagementV12.orc b/tests/integration/data/TestOrcFile.testMemoryManagementV12.orc
deleted file mode 100644
index dd6fc1e6..00000000
Binary files a/tests/integration/data/TestOrcFile.testMemoryManagementV12.orc and /dev/null differ
diff --git a/tests/integration/data/TestOrcFile.testPredicatePushdown.orc b/tests/integration/data/TestOrcFile.testPredicatePushdown.orc
deleted file mode 100644
index 4865dd81..00000000
Binary files a/tests/integration/data/TestOrcFile.testPredicatePushdown.orc and /dev/null differ
diff --git a/tests/integration/data/TestOrcFile.testSargSkipPickupGroupWithoutIndexCPlusPlus.orc b/tests/integration/data/TestOrcFile.testSargSkipPickupGroupWithoutIndexCPlusPlus.orc
deleted file mode 100644
index d32bb1f1..00000000
Binary files a/tests/integration/data/TestOrcFile.testSargSkipPickupGroupWithoutIndexCPlusPlus.orc and /dev/null differ
diff --git a/tests/integration/data/TestOrcFile.testSargSkipPickupGroupWithoutIndexJava.orc b/tests/integration/data/TestOrcFile.testSargSkipPickupGroupWithoutIndexJava.orc
deleted file mode 100644
index c57c5b73..00000000
Binary files a/tests/integration/data/TestOrcFile.testSargSkipPickupGroupWithoutIndexJava.orc and /dev/null differ
diff --git a/tests/integration/data/TestOrcFile.testSeek.orc b/tests/integration/data/TestOrcFile.testSeek.orc
deleted file mode 100644
index 006b83f2..00000000
Binary files a/tests/integration/data/TestOrcFile.testSeek.orc and /dev/null differ
diff --git a/tests/integration/data/TestOrcFile.testSnappy.orc b/tests/integration/data/TestOrcFile.testSnappy.orc
deleted file mode 100644
index aa6cc9c9..00000000
Binary files a/tests/integration/data/TestOrcFile.testSnappy.orc and /dev/null differ
diff --git a/tests/integration/data/TestOrcFile.testStringAndBinaryStatistics.orc b/tests/integration/data/TestOrcFile.testStringAndBinaryStatistics.orc
deleted file mode 100644
index 4282c2a1..00000000
Binary files a/tests/integration/data/TestOrcFile.testStringAndBinaryStatistics.orc and /dev/null differ
diff --git a/tests/integration/data/TestOrcFile.testStripeLevelStats.orc b/tests/integration/data/TestOrcFile.testStripeLevelStats.orc
deleted file mode 100644
index 7073bfad..00000000
Binary files a/tests/integration/data/TestOrcFile.testStripeLevelStats.orc and /dev/null differ
diff --git a/tests/integration/data/TestOrcFile.testTimestamp.orc b/tests/integration/data/TestOrcFile.testTimestamp.orc
deleted file mode 100644
index 505d42cd..00000000
Binary files a/tests/integration/data/TestOrcFile.testTimestamp.orc and /dev/null differ
diff --git a/tests/integration/data/TestOrcFile.testUnionAndTimestamp.orc b/tests/integration/data/TestOrcFile.testUnionAndTimestamp.orc
deleted file mode 100644
index 377862df..00000000
Binary files a/tests/integration/data/TestOrcFile.testUnionAndTimestamp.orc and /dev/null differ
diff --git a/tests/integration/data/TestOrcFile.testWithoutCompressionBlockSize.orc b/tests/integration/data/TestOrcFile.testWithoutCompressionBlockSize.orc
deleted file mode 100644
index 552f4e77..00000000
Binary files a/tests/integration/data/TestOrcFile.testWithoutCompressionBlockSize.orc and /dev/null differ
diff --git a/tests/integration/data/TestOrcFile.testWithoutIndex.orc b/tests/integration/data/TestOrcFile.testWithoutIndex.orc
deleted file mode 100644
index a150df7a..00000000
Binary files a/tests/integration/data/TestOrcFile.testWithoutIndex.orc and /dev/null differ
diff --git a/tests/integration/data/TestStringDictionary.testRowIndex.orc b/tests/integration/data/TestStringDictionary.testRowIndex.orc
deleted file mode 100644
index cba483d1..00000000
Binary files a/tests/integration/data/TestStringDictionary.testRowIndex.orc and /dev/null differ
diff --git a/tests/integration/data/TestVectorOrcFile.testLz4.orc b/tests/integration/data/TestVectorOrcFile.testLz4.orc
deleted file mode 100644
index dacba8de..00000000
Binary files a/tests/integration/data/TestVectorOrcFile.testLz4.orc and /dev/null differ
diff --git a/tests/integration/data/TestVectorOrcFile.testLzo.orc b/tests/integration/data/TestVectorOrcFile.testLzo.orc
deleted file mode 100644
index 2b01fb56..00000000
Binary files a/tests/integration/data/TestVectorOrcFile.testLzo.orc and /dev/null differ
diff --git a/tests/integration/data/TestVectorOrcFile.testZstd.0.12.orc b/tests/integration/data/TestVectorOrcFile.testZstd.0.12.orc
deleted file mode 100644
index d2644194..00000000
Binary files a/tests/integration/data/TestVectorOrcFile.testZstd.0.12.orc and /dev/null differ
diff --git a/tests/integration/data/bad_bloom_filter_1.6.0.orc b/tests/integration/data/bad_bloom_filter_1.6.0.orc
deleted file mode 100644
index 6c52f4f0..00000000
Binary files a/tests/integration/data/bad_bloom_filter_1.6.0.orc and /dev/null differ
diff --git a/tests/integration/data/bad_bloom_filter_1.6.11.orc b/tests/integration/data/bad_bloom_filter_1.6.11.orc
deleted file mode 100644
index fc3ffef8..00000000
Binary files a/tests/integration/data/bad_bloom_filter_1.6.11.orc and /dev/null differ
diff --git a/tests/integration/data/complextypes_iceberg.orc b/tests/integration/data/complextypes_iceberg.orc
deleted file mode 100644
index 2829e501..00000000
Binary files a/tests/integration/data/complextypes_iceberg.orc and /dev/null differ
diff --git a/tests/integration/data/corrupt/missing_blob_stream_in_string_dict.orc b/tests/integration/data/corrupt/missing_blob_stream_in_string_dict.orc
deleted file mode 100644
index 1c7f7420..00000000
Binary files a/tests/integration/data/corrupt/missing_blob_stream_in_string_dict.orc and /dev/null differ
diff --git a/tests/integration/data/corrupt/missing_length_stream_in_string_dict.orc b/tests/integration/data/corrupt/missing_length_stream_in_string_dict.orc
deleted file mode 100644
index 92912b0e..00000000
Binary files a/tests/integration/data/corrupt/missing_length_stream_in_string_dict.orc and /dev/null differ
diff --git a/tests/integration/data/corrupt/negative_dict_entry_lengths.orc b/tests/integration/data/corrupt/negative_dict_entry_lengths.orc
deleted file mode 100644
index 171537db..00000000
Binary files a/tests/integration/data/corrupt/negative_dict_entry_lengths.orc and /dev/null differ
diff --git a/tests/integration/data/corrupt/stripe_footer_bad_column_encodings.orc b/tests/integration/data/corrupt/stripe_footer_bad_column_encodings.orc
deleted file mode 100644
index 24466239..00000000
Binary files a/tests/integration/data/corrupt/stripe_footer_bad_column_encodings.orc and /dev/null differ
diff --git a/tests/integration/data/decimal.orc b/tests/integration/data/decimal.orc
deleted file mode 100644
index cb0f7b9d..00000000
Binary files a/tests/integration/data/decimal.orc and /dev/null differ
diff --git a/tests/integration/data/decimal64_v2.orc b/tests/integration/data/decimal64_v2.orc
deleted file mode 100644
index 196be7fe..00000000
Binary files a/tests/integration/data/decimal64_v2.orc and /dev/null differ
diff --git a/tests/integration/data/decimal64_v2_cplusplus.orc b/tests/integration/data/decimal64_v2_cplusplus.orc
deleted file mode 100644
index faf35247..00000000
Binary files a/tests/integration/data/decimal64_v2_cplusplus.orc and /dev/null differ
diff --git a/tests/integration/data/demo-11-none.orc b/tests/integration/data/demo-11-none.orc
deleted file mode 100644
index 1d1d714a..00000000
Binary files a/tests/integration/data/demo-11-none.orc and /dev/null differ
diff --git a/tests/integration/data/demo-11-zlib.orc b/tests/integration/data/demo-11-zlib.orc
deleted file mode 100644
index db0ff15e..00000000
Binary files a/tests/integration/data/demo-11-zlib.orc and /dev/null differ
diff --git a/tests/integration/data/demo-12-zlib.orc b/tests/integration/data/demo-12-zlib.orc
deleted file mode 100644
index 862dd27a..00000000
Binary files a/tests/integration/data/demo-12-zlib.orc and /dev/null differ
diff --git a/tests/integration/data/encrypted/kms.keystore b/tests/integration/data/encrypted/kms.keystore
deleted file mode 100644
index cd9152b7..00000000
Binary files a/tests/integration/data/encrypted/kms.keystore and /dev/null differ
diff --git a/tests/integration/data/encrypted/sample1.orc b/tests/integration/data/encrypted/sample1.orc
deleted file mode 100644
index aa0e502a..00000000
Binary files a/tests/integration/data/encrypted/sample1.orc and /dev/null differ
diff --git a/tests/integration/data/encrypted/sample2.orc b/tests/integration/data/encrypted/sample2.orc
deleted file mode 100644
index 7f9b2146..00000000
Binary files a/tests/integration/data/encrypted/sample2.orc and /dev/null differ
diff --git a/tests/integration/data/expected/TestOrcFile.columnProjection.jsn.gz b/tests/integration/data/expected/TestOrcFile.columnProjection.jsn.gz
deleted file mode 100644
index ab851520..00000000
Binary files a/tests/integration/data/expected/TestOrcFile.columnProjection.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/TestOrcFile.emptyFile.jsn.gz b/tests/integration/data/expected/TestOrcFile.emptyFile.jsn.gz
deleted file mode 100644
index 91c85cd7..00000000
Binary files a/tests/integration/data/expected/TestOrcFile.emptyFile.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/TestOrcFile.metaData.jsn.gz b/tests/integration/data/expected/TestOrcFile.metaData.jsn.gz
deleted file mode 100644
index da9dffbe..00000000
Binary files a/tests/integration/data/expected/TestOrcFile.metaData.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/TestOrcFile.test1.jsn.gz b/tests/integration/data/expected/TestOrcFile.test1.jsn.gz
deleted file mode 100644
index 5eab19a4..00000000
Binary files a/tests/integration/data/expected/TestOrcFile.test1.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/TestOrcFile.testDate1900.jsn.gz b/tests/integration/data/expected/TestOrcFile.testDate1900.jsn.gz
deleted file mode 100644
index 62dbaba4..00000000
Binary files a/tests/integration/data/expected/TestOrcFile.testDate1900.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/TestOrcFile.testDate2038.jsn.gz b/tests/integration/data/expected/TestOrcFile.testDate2038.jsn.gz
deleted file mode 100644
index 8a84655d..00000000
Binary files a/tests/integration/data/expected/TestOrcFile.testDate2038.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/TestOrcFile.testMemoryManagementV11.jsn.gz b/tests/integration/data/expected/TestOrcFile.testMemoryManagementV11.jsn.gz
deleted file mode 100644
index 591e3c79..00000000
Binary files a/tests/integration/data/expected/TestOrcFile.testMemoryManagementV11.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/TestOrcFile.testMemoryManagementV12.jsn.gz b/tests/integration/data/expected/TestOrcFile.testMemoryManagementV12.jsn.gz
deleted file mode 100644
index 14ece1d0..00000000
Binary files a/tests/integration/data/expected/TestOrcFile.testMemoryManagementV12.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/TestOrcFile.testPredicatePushdown.jsn.gz b/tests/integration/data/expected/TestOrcFile.testPredicatePushdown.jsn.gz
deleted file mode 100644
index cf80773f..00000000
Binary files a/tests/integration/data/expected/TestOrcFile.testPredicatePushdown.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/TestOrcFile.testSeek.jsn.gz b/tests/integration/data/expected/TestOrcFile.testSeek.jsn.gz
deleted file mode 100644
index d6afadde..00000000
Binary files a/tests/integration/data/expected/TestOrcFile.testSeek.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/TestOrcFile.testSnappy.jsn.gz b/tests/integration/data/expected/TestOrcFile.testSnappy.jsn.gz
deleted file mode 100644
index 1bd2ae96..00000000
Binary files a/tests/integration/data/expected/TestOrcFile.testSnappy.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/TestOrcFile.testStringAndBinaryStatistics.jsn.gz b/tests/integration/data/expected/TestOrcFile.testStringAndBinaryStatistics.jsn.gz
deleted file mode 100644
index 76d962e6..00000000
Binary files a/tests/integration/data/expected/TestOrcFile.testStringAndBinaryStatistics.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/TestOrcFile.testStripeLevelStats.jsn.gz b/tests/integration/data/expected/TestOrcFile.testStripeLevelStats.jsn.gz
deleted file mode 100644
index f5437136..00000000
Binary files a/tests/integration/data/expected/TestOrcFile.testStripeLevelStats.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/TestOrcFile.testTimestamp.jsn.gz b/tests/integration/data/expected/TestOrcFile.testTimestamp.jsn.gz
deleted file mode 100644
index 375c9a01..00000000
Binary files a/tests/integration/data/expected/TestOrcFile.testTimestamp.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/TestOrcFile.testUnionAndTimestamp.jsn.gz b/tests/integration/data/expected/TestOrcFile.testUnionAndTimestamp.jsn.gz
deleted file mode 100644
index bd0c5a6f..00000000
Binary files a/tests/integration/data/expected/TestOrcFile.testUnionAndTimestamp.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/TestOrcFile.testWithoutIndex.jsn.gz b/tests/integration/data/expected/TestOrcFile.testWithoutIndex.jsn.gz
deleted file mode 100644
index f2128ca1..00000000
Binary files a/tests/integration/data/expected/TestOrcFile.testWithoutIndex.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/TestStringDictionary.testRowIndex.jsn.gz b/tests/integration/data/expected/TestStringDictionary.testRowIndex.jsn.gz
deleted file mode 100644
index 5fb46b6b..00000000
Binary files a/tests/integration/data/expected/TestStringDictionary.testRowIndex.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/TestVectorOrcFile.testLz4.jsn.gz b/tests/integration/data/expected/TestVectorOrcFile.testLz4.jsn.gz
deleted file mode 100644
index 60a846e3..00000000
Binary files a/tests/integration/data/expected/TestVectorOrcFile.testLz4.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/TestVectorOrcFile.testLzo.jsn.gz b/tests/integration/data/expected/TestVectorOrcFile.testLzo.jsn.gz
deleted file mode 100644
index e002379c..00000000
Binary files a/tests/integration/data/expected/TestVectorOrcFile.testLzo.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/decimal.jsn.gz b/tests/integration/data/expected/decimal.jsn.gz
deleted file mode 100644
index e634bd70..00000000
Binary files a/tests/integration/data/expected/decimal.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/demo-12-zlib.jsn.gz b/tests/integration/data/expected/demo-12-zlib.jsn.gz
deleted file mode 100644
index ab532846..00000000
Binary files a/tests/integration/data/expected/demo-12-zlib.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/nulls-at-end-snappy.jsn.gz b/tests/integration/data/expected/nulls-at-end-snappy.jsn.gz
deleted file mode 100644
index a51a95bf..00000000
Binary files a/tests/integration/data/expected/nulls-at-end-snappy.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/orc-file-11-format.jsn.gz b/tests/integration/data/expected/orc-file-11-format.jsn.gz
deleted file mode 100644
index 819f4a22..00000000
Binary files a/tests/integration/data/expected/orc-file-11-format.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/orc_index_int_string.jsn.gz b/tests/integration/data/expected/orc_index_int_string.jsn.gz
deleted file mode 100644
index 4e6de3dc..00000000
Binary files a/tests/integration/data/expected/orc_index_int_string.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/orc_split_elim.jsn.gz b/tests/integration/data/expected/orc_split_elim.jsn.gz
deleted file mode 100644
index 52636996..00000000
Binary files a/tests/integration/data/expected/orc_split_elim.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/orc_split_elim_cpp.jsn.gz b/tests/integration/data/expected/orc_split_elim_cpp.jsn.gz
deleted file mode 100644
index 52636996..00000000
Binary files a/tests/integration/data/expected/orc_split_elim_cpp.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/orc_split_elim_new.jsn.gz b/tests/integration/data/expected/orc_split_elim_new.jsn.gz
deleted file mode 100644
index 52636996..00000000
Binary files a/tests/integration/data/expected/orc_split_elim_new.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected/over1k_bloom.jsn.gz b/tests/integration/data/expected/over1k_bloom.jsn.gz
deleted file mode 100644
index b159f0c2..00000000
Binary files a/tests/integration/data/expected/over1k_bloom.jsn.gz and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/TestOrcFile.columnProjection.feather b/tests/integration/data/expected_arrow/TestOrcFile.columnProjection.feather
deleted file mode 100644
index 29c55436..00000000
Binary files a/tests/integration/data/expected_arrow/TestOrcFile.columnProjection.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/TestOrcFile.emptyFile.feather b/tests/integration/data/expected_arrow/TestOrcFile.emptyFile.feather
deleted file mode 100644
index 0ce6883c..00000000
Binary files a/tests/integration/data/expected_arrow/TestOrcFile.emptyFile.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/TestOrcFile.metaData.feather b/tests/integration/data/expected_arrow/TestOrcFile.metaData.feather
deleted file mode 100644
index d270eec8..00000000
Binary files a/tests/integration/data/expected_arrow/TestOrcFile.metaData.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/TestOrcFile.test1.feather b/tests/integration/data/expected_arrow/TestOrcFile.test1.feather
deleted file mode 100644
index 899d5dc3..00000000
Binary files a/tests/integration/data/expected_arrow/TestOrcFile.test1.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/TestOrcFile.testDate1900.feather b/tests/integration/data/expected_arrow/TestOrcFile.testDate1900.feather
deleted file mode 100644
index 97f61def..00000000
Binary files a/tests/integration/data/expected_arrow/TestOrcFile.testDate1900.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/TestOrcFile.testDate2038.feather b/tests/integration/data/expected_arrow/TestOrcFile.testDate2038.feather
deleted file mode 100644
index b290a127..00000000
Binary files a/tests/integration/data/expected_arrow/TestOrcFile.testDate2038.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/TestOrcFile.testMemoryManagementV11.feather b/tests/integration/data/expected_arrow/TestOrcFile.testMemoryManagementV11.feather
deleted file mode 100644
index 6f5e1e8e..00000000
Binary files a/tests/integration/data/expected_arrow/TestOrcFile.testMemoryManagementV11.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/TestOrcFile.testMemoryManagementV12.feather b/tests/integration/data/expected_arrow/TestOrcFile.testMemoryManagementV12.feather
deleted file mode 100644
index 5db30162..00000000
Binary files a/tests/integration/data/expected_arrow/TestOrcFile.testMemoryManagementV12.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/TestOrcFile.testPredicatePushdown.feather b/tests/integration/data/expected_arrow/TestOrcFile.testPredicatePushdown.feather
deleted file mode 100644
index 36ea2895..00000000
Binary files a/tests/integration/data/expected_arrow/TestOrcFile.testPredicatePushdown.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/TestOrcFile.testSeek.feather b/tests/integration/data/expected_arrow/TestOrcFile.testSeek.feather
deleted file mode 100644
index 380784bb..00000000
Binary files a/tests/integration/data/expected_arrow/TestOrcFile.testSeek.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/TestOrcFile.testSnappy.feather b/tests/integration/data/expected_arrow/TestOrcFile.testSnappy.feather
deleted file mode 100644
index efcf095c..00000000
Binary files a/tests/integration/data/expected_arrow/TestOrcFile.testSnappy.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/TestOrcFile.testStringAndBinaryStatistics.feather b/tests/integration/data/expected_arrow/TestOrcFile.testStringAndBinaryStatistics.feather
deleted file mode 100644
index 1c308d44..00000000
Binary files a/tests/integration/data/expected_arrow/TestOrcFile.testStringAndBinaryStatistics.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/TestOrcFile.testStripeLevelStats.feather b/tests/integration/data/expected_arrow/TestOrcFile.testStripeLevelStats.feather
deleted file mode 100644
index 94c4e3b7..00000000
Binary files a/tests/integration/data/expected_arrow/TestOrcFile.testStripeLevelStats.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/TestOrcFile.testUnionAndTimestamp.feather b/tests/integration/data/expected_arrow/TestOrcFile.testUnionAndTimestamp.feather
deleted file mode 100644
index 2565815c..00000000
Binary files a/tests/integration/data/expected_arrow/TestOrcFile.testUnionAndTimestamp.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/TestOrcFile.testWithoutIndex.feather b/tests/integration/data/expected_arrow/TestOrcFile.testWithoutIndex.feather
deleted file mode 100644
index a376720c..00000000
Binary files a/tests/integration/data/expected_arrow/TestOrcFile.testWithoutIndex.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/TestStringDictionary.testRowIndex.feather b/tests/integration/data/expected_arrow/TestStringDictionary.testRowIndex.feather
deleted file mode 100644
index 99c23fab..00000000
Binary files a/tests/integration/data/expected_arrow/TestStringDictionary.testRowIndex.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/TestVectorOrcFile.testLz4.feather b/tests/integration/data/expected_arrow/TestVectorOrcFile.testLz4.feather
deleted file mode 100644
index a798bfa7..00000000
Binary files a/tests/integration/data/expected_arrow/TestVectorOrcFile.testLz4.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/TestVectorOrcFile.testLzo.feather b/tests/integration/data/expected_arrow/TestVectorOrcFile.testLzo.feather
deleted file mode 100644
index 6075dfbe..00000000
Binary files a/tests/integration/data/expected_arrow/TestVectorOrcFile.testLzo.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/decimal.feather b/tests/integration/data/expected_arrow/decimal.feather
deleted file mode 100644
index 8fa570f6..00000000
Binary files a/tests/integration/data/expected_arrow/decimal.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/demo-12-zlib.feather b/tests/integration/data/expected_arrow/demo-12-zlib.feather
deleted file mode 100644
index 2947f039..00000000
Binary files a/tests/integration/data/expected_arrow/demo-12-zlib.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/nulls-at-end-snappy.feather b/tests/integration/data/expected_arrow/nulls-at-end-snappy.feather
deleted file mode 100644
index b1238586..00000000
Binary files a/tests/integration/data/expected_arrow/nulls-at-end-snappy.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/orc-file-11-format.feather b/tests/integration/data/expected_arrow/orc-file-11-format.feather
deleted file mode 100644
index 0258ffc3..00000000
Binary files a/tests/integration/data/expected_arrow/orc-file-11-format.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/orc_index_int_string.feather b/tests/integration/data/expected_arrow/orc_index_int_string.feather
deleted file mode 100644
index 2e5b01a7..00000000
Binary files a/tests/integration/data/expected_arrow/orc_index_int_string.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/orc_split_elim.feather b/tests/integration/data/expected_arrow/orc_split_elim.feather
deleted file mode 100644
index e39f6354..00000000
Binary files a/tests/integration/data/expected_arrow/orc_split_elim.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/orc_split_elim_cpp.feather b/tests/integration/data/expected_arrow/orc_split_elim_cpp.feather
deleted file mode 100644
index 9d81c890..00000000
Binary files a/tests/integration/data/expected_arrow/orc_split_elim_cpp.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/orc_split_elim_new.feather b/tests/integration/data/expected_arrow/orc_split_elim_new.feather
deleted file mode 100644
index 9d81c890..00000000
Binary files a/tests/integration/data/expected_arrow/orc_split_elim_new.feather and /dev/null differ
diff --git a/tests/integration/data/expected_arrow/over1k_bloom.feather b/tests/integration/data/expected_arrow/over1k_bloom.feather
deleted file mode 100644
index 985b5500..00000000
Binary files a/tests/integration/data/expected_arrow/over1k_bloom.feather and /dev/null differ
diff --git a/tests/integration/data/nulls-at-end-snappy.orc b/tests/integration/data/nulls-at-end-snappy.orc
deleted file mode 100644
index 2099c484..00000000
Binary files a/tests/integration/data/nulls-at-end-snappy.orc and /dev/null differ
diff --git a/tests/integration/data/orc-file-11-format.orc b/tests/integration/data/orc-file-11-format.orc
deleted file mode 100644
index 41653c84..00000000
Binary files a/tests/integration/data/orc-file-11-format.orc and /dev/null differ
diff --git a/tests/integration/data/orc_index_int_string.orc b/tests/integration/data/orc_index_int_string.orc
deleted file mode 100644
index 17f90729..00000000
Binary files a/tests/integration/data/orc_index_int_string.orc and /dev/null differ
diff --git a/tests/integration/data/orc_no_format.orc b/tests/integration/data/orc_no_format.orc
deleted file mode 100644
index 3efb93a4..00000000
Binary files a/tests/integration/data/orc_no_format.orc and /dev/null differ
diff --git a/tests/integration/data/orc_split_elim.orc b/tests/integration/data/orc_split_elim.orc
deleted file mode 100644
index cd145d34..00000000
Binary files a/tests/integration/data/orc_split_elim.orc and /dev/null differ
diff --git a/tests/integration/data/orc_split_elim_cpp.orc b/tests/integration/data/orc_split_elim_cpp.orc
deleted file mode 100644
index 86921f34..00000000
Binary files a/tests/integration/data/orc_split_elim_cpp.orc and /dev/null differ
diff --git a/tests/integration/data/orc_split_elim_new.orc b/tests/integration/data/orc_split_elim_new.orc
deleted file mode 100644
index 24e58f13..00000000
Binary files a/tests/integration/data/orc_split_elim_new.orc and /dev/null differ
diff --git a/tests/integration/data/over1k_bloom.orc b/tests/integration/data/over1k_bloom.orc
deleted file mode 100755
index 245f3976..00000000
Binary files a/tests/integration/data/over1k_bloom.orc and /dev/null differ
diff --git a/tests/integration/data/version1999.orc b/tests/integration/data/version1999.orc
deleted file mode 100644
index 1748c703..00000000
Binary files a/tests/integration/data/version1999.orc and /dev/null differ
diff --git a/tests/integration/data/zero.orc b/tests/integration/data/zero.orc
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/integration/main.rs b/tests/integration/main.rs
deleted file mode 100644
index 8c498118..00000000
--- a/tests/integration/main.rs
+++ /dev/null
@@ -1,259 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-/// Tests ORC files from the official test suite (`orc/examples/`) against Arrow feather
-/// expected data sourced by reading the ORC files with PyArrow and persisting as feather.
-use std::fs::File;
-
-use arrow::{
-    array::{Array, AsArray},
-    compute::concat_batches,
-    datatypes::TimestampNanosecondType,
-    ipc::reader::FileReader,
-    record_batch::{RecordBatch, RecordBatchReader},
-};
-use pretty_assertions::assert_eq;
-
-use orc_rust::arrow_reader::ArrowReaderBuilder;
-
-fn read_orc_file(name: &str) -> RecordBatch {
-    let path = format!(
-        "{}/tests/integration/data/{}.orc",
-        env!("CARGO_MANIFEST_DIR"),
-        name
-    );
-    let f = File::open(path).unwrap();
-    let reader = ArrowReaderBuilder::try_new(f).unwrap().build();
-    let schema = reader.schema();
-    let batches = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    // Gather all record batches into single one for easier comparison
-    concat_batches(&schema, batches.iter()).unwrap()
-}
-
-fn read_feather_file(name: &str) -> RecordBatch {
-    let feather_path = format!(
-        "{}/tests/integration/data/expected_arrow/{}.feather",
-        env!("CARGO_MANIFEST_DIR"),
-        name
-    );
-    let f = File::open(feather_path).unwrap();
-    let reader = FileReader::try_new(f, None).unwrap();
-    let schema = reader.schema();
-    let batches = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    // Gather all record batches into single one for easier comparison
-    concat_batches(&schema, batches.iter()).unwrap()
-}
-
-/// Checks specific `.orc` file against corresponding expected feather file
-fn test_expected_file(name: &str) {
-    let actual_batch = read_orc_file(name);
-    let expected_batch = read_feather_file(name);
-    assert_eq!(actual_batch, expected_batch);
-}
-
-/// Similar to test_expected_file but compares the pretty formatted RecordBatches.
-/// Useful when checking equality of values but not exact equality (e.g. representation
-/// for UnionArrays can differ internally but logically represent the same values)
-fn test_expected_file_formatted(name: &str) {
-    let actual_batch = read_orc_file(name);
-    let expected_batch = read_feather_file(name);
-
-    let actual = arrow::util::pretty::pretty_format_batches(&[actual_batch])
-        .unwrap()
-        .to_string();
-    let expected = arrow::util::pretty::pretty_format_batches(&[expected_batch])
-        .unwrap()
-        .to_string();
-    let actual_lines = actual.trim().lines().collect::<Vec<_>>();
-    let expected_lines = expected.trim().lines().collect::<Vec<_>>();
-    assert_eq!(&actual_lines, &expected_lines);
-}
-
-#[test]
-fn column_projection() {
-    test_expected_file("TestOrcFile.columnProjection");
-}
-
-#[test]
-// TODO: arrow-ipc reader can't read invalid custom_metadata
-//       from the expected feather file:
-//       https://github.com/apache/arrow-rs/issues/5547
-#[ignore]
-fn meta_data() {
-    test_expected_file("TestOrcFile.metaData");
-}
-
-#[test]
-fn test1() {
-    // Compare formatted because Map key/value field names differs from PyArrow
-    test_expected_file_formatted("TestOrcFile.test1");
-}
-
-#[test]
-fn empty_file() {
-    // Compare formatted because Map key/value field names differs from PyArrow
-    test_expected_file_formatted("TestOrcFile.emptyFile");
-}
-
-#[test]
-fn test_date_1900() {
-    test_expected_file("TestOrcFile.testDate1900");
-}
-
-#[test]
-fn test_date_1900_not_null() {
-    // Don't use read_orc_file() because it concatenate batches, which would detect
-    // there are no nulls and remove the NullBuffer, making this test useless
-    let path = format!(
-        "{}/tests/integration/data/TestOrcFile.testDate1900.orc",
-        env!("CARGO_MANIFEST_DIR"),
-    );
-    let f = File::open(path).unwrap();
-    let reader = ArrowReaderBuilder::try_new(f).unwrap().build();
-    let batches = reader.collect::<Result<Vec<_>, _>>().unwrap();
-
-    for batch in batches {
-        assert!(batch.columns()[0]
-            .as_primitive_opt::<TimestampNanosecondType>()
-            .unwrap()
-            .nulls()
-            .is_none());
-    }
-}
-
-#[test]
-#[ignore]
-// TODO: pending https://github.com/chronotope/chrono-tz/issues/155
-fn test_date_2038() {
-    test_expected_file("TestOrcFile.testDate2038");
-}
-
-#[test]
-fn test_memory_management_v11() {
-    test_expected_file("TestOrcFile.testMemoryManagementV11");
-}
-
-#[test]
-fn test_memory_management_v12() {
-    test_expected_file("TestOrcFile.testMemoryManagementV12");
-}
-
-#[test]
-fn test_predicate_pushdown() {
-    test_expected_file("TestOrcFile.testPredicatePushdown");
-}
-
-#[test]
-fn test_seek() {
-    // Compare formatted because Map key/value field names differs from PyArrow
-    test_expected_file_formatted("TestOrcFile.testSeek");
-}
-
-#[test]
-fn test_snappy() {
-    test_expected_file("TestOrcFile.testSnappy");
-}
-
-#[test]
-fn test_string_and_binary_statistics() {
-    test_expected_file("TestOrcFile.testStringAndBinaryStatistics");
-}
-
-#[test]
-fn test_stripe_level_stats() {
-    test_expected_file("TestOrcFile.testStripeLevelStats");
-}
-
-#[test]
-#[ignore] // TODO: Non-struct root type are not supported yet
-fn test_timestamp() {
-    test_expected_file("TestOrcFile.testTimestamp");
-}
-
-#[test]
-fn test_union_and_timestamp() {
-    // Compare formatted because internal Union representation can differ
-    // even if it represents the same logical values
-    test_expected_file_formatted("TestOrcFile.testUnionAndTimestamp");
-}
-
-#[test]
-fn test_without_index() {
-    test_expected_file("TestOrcFile.testWithoutIndex");
-}
-
-#[test]
-fn test_lz4() {
-    test_expected_file("TestVectorOrcFile.testLz4");
-}
-
-#[test]
-fn test_lzo() {
-    test_expected_file("TestVectorOrcFile.testLzo");
-}
-
-#[test]
-fn decimal() {
-    test_expected_file("decimal");
-}
-
-#[test]
-fn zlib() {
-    test_expected_file("demo-12-zlib");
-}
-
-#[test]
-fn nulls_at_end_snappy() {
-    test_expected_file("nulls-at-end-snappy");
-}
-
-#[test]
-#[ignore]
-// TODO: investigate why this fails (zero metadata length how?)
-//       because of difference with ORC 0.11 vs 0.12?
-fn orc_11_format() {
-    test_expected_file("orc-file-11-format");
-}
-
-#[test]
-fn orc_index_int_string() {
-    test_expected_file("orc_index_int_string");
-}
-
-#[test]
-#[ignore]
-// TODO: how to handle DECIMAL(0, 0) type? Arrow expects non-zero precision
-fn orc_split_elim() {
-    test_expected_file("orc_split_elim");
-}
-
-#[test]
-fn orc_split_elim_cpp() {
-    test_expected_file("orc_split_elim_cpp");
-}
-
-#[test]
-fn orc_split_elim_new() {
-    test_expected_file("orc_split_elim_new");
-}
-
-#[test]
-fn over1k_bloom() {
-    test_expected_file("over1k_bloom");
-}