diff --git a/Cargo.toml b/Cargo.toml
index d59a5af68a19..3267bf47dfa4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -32,6 +32,8 @@ members = [
     "arrow-integration-testing",
     "arrow-ipc",
     "arrow-json",
+    "arrow-orc",
+    "arrow-orc/orc-gen",
     "arrow-ord",
     "arrow-row",
     "arrow-schema",
@@ -86,6 +88,7 @@ arrow-csv = { version = "48.0.0", path = "./arrow-csv" }
 arrow-data = { version = "48.0.0", path = "./arrow-data" }
 arrow-ipc = { version = "48.0.0", path = "./arrow-ipc" }
 arrow-json = { version = "48.0.0", path = "./arrow-json" }
+arrow-orc = { version = "48.0.0", path = "./arrow-orc" }
 arrow-ord = { version = "48.0.0", path = "./arrow-ord" }
 arrow-row = { version = "48.0.0", path = "./arrow-row" }
 arrow-schema = { version = "48.0.0", path = "./arrow-schema" }
diff --git a/arrow-orc/Cargo.toml b/arrow-orc/Cargo.toml
new file mode 100644
index 000000000000..aff336de92f1
--- /dev/null
+++ b/arrow-orc/Cargo.toml
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "arrow-orc"
+version = { workspace = true }
+description = "Support for parsing ORC format into the Arrow format"
+homepage = { workspace = true }
+repository = { workspace = true }
+authors = { workspace = true }
+license = { workspace = true }
+keywords = { workspace = true }
+include = { workspace = true }
+edition = { workspace = true }
+rust-version = { workspace = true }
+
+[lib]
+name = "arrow_orc"
+path = "src/lib.rs"
+bench = false
+
+[dependencies]
+arrow-array = { workspace = true }
+arrow-buffer = { workspace = true }
+arrow-cast = { workspace = true }
+arrow-data = { workspace = true }
+arrow-schema = { workspace = true }
+
+bytes = { version = "1", default-features = false, features = ["std"] }
+snap = { version = "1.1", default-features = false }
+flate2 = { version = "1.0", default-features = false, features = ["rust_backend"] }
+lz4_flex = { version = "0.11", default-features = false, features = ["std"] }
+zstd = { version = "0.12",  default-features = false }
+lzokay-native = "0.1"
+
+prost = "0.12.1"
+
diff --git a/arrow-orc/orc-gen/Cargo.toml b/arrow-orc/orc-gen/Cargo.toml
new file mode 100644
index 000000000000..a0a6927416fe
--- /dev/null
+++ b/arrow-orc/orc-gen/Cargo.toml
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "orc-gen"
+description = "Code generation for arrow-orc"
+version = "0.1.0"
+edition = { workspace = true }
+rust-version = { workspace = true }
+authors = { workspace = true }
+homepage = { workspace = true }
+repository = { workspace = true }
+license = { workspace = true }
+publish = false
+
+
+[dependencies]
+prost-build = { version = "=0.12.1", default-features = false }
diff --git a/arrow-orc/orc-gen/src/main.rs b/arrow-orc/orc-gen/src/main.rs
new file mode 100644
index 000000000000..3a960e8f245e
--- /dev/null
+++ b/arrow-orc/orc-gen/src/main.rs
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{
+    fs::{remove_file, OpenOptions},
+    io::{Read, Write},
+};
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    prost_build::Config::new()
+        .out_dir("src/")
+        .compile_well_known_types()
+        .extern_path(".google.protobuf", "::pbjson_types")
+        .compile_protos(&["../format/orc_proto.proto"], &["../format"])?;
+
+    // read file contents to string
+    let mut file = OpenOptions::new().read(true).open("src/orc.proto.rs")?;
+    let mut buffer = String::new();
+    file.read_to_string(&mut buffer)?;
+    // append warning that file was auto-generate
+    let mut file = OpenOptions::new()
+        .write(true)
+        .truncate(true)
+        .create(true)
+        .open("src/proto.rs")?;
+    file.write_all("// This file was automatically generated through the regen.sh script, and should not be edited.\n\n".as_bytes())?;
+    file.write_all(buffer.as_bytes())?;
+
+    // since we renamed file to proto.rs to avoid period in the name
+    remove_file("src/orc.proto.rs")?;
+
+    // As the proto file is checked in, the build should not fail if the file is not found
+    Ok(())
+}
diff --git a/arrow-orc/regen.sh b/arrow-orc/regen.sh
new file mode 100755
index 000000000000..f543928ed76c
--- /dev/null
+++ b/arrow-orc/regen.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+cd $SCRIPT_DIR && cargo run --manifest-path orc-gen/Cargo.toml
diff --git a/arrow-orc/src/array_reader/mod.rs b/arrow-orc/src/array_reader/mod.rs
new file mode 100644
index 000000000000..74ab8b69173c
--- /dev/null
+++ b/arrow-orc/src/array_reader/mod.rs
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Read ORC file columns as Arrow arrays.
+
+use arrow_array::ArrayRef;
+
+use crate::errors::Result;
+
+pub mod struct_array_reader;
+
+/// Used to be able to read batches of data from columns into Arrow arrays.
+pub trait ArrayReader {
+    fn next_batch(&mut self, batch_size: usize) -> Result<Option<ArrayRef>>;
+}
diff --git a/arrow-orc/src/array_reader/struct_array_reader.rs b/arrow-orc/src/array_reader/struct_array_reader.rs
new file mode 100644
index 000000000000..53dd1c462844
--- /dev/null
+++ b/arrow-orc/src/array_reader/struct_array_reader.rs
@@ -0,0 +1,94 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Read Struct Arrays from ORC file column
+
+use std::sync::Arc;
+
+use arrow_array::{Array, ArrayRef, StructArray};
+use arrow_data::ArrayDataBuilder;
+use arrow_schema::DataType;
+
+use crate::errors::Result;
+
+use super::ArrayReader;
+
+pub struct StructArrayReader {
+    children: Vec<Box<dyn ArrayReader>>,
+    data_type: DataType,
+}
+
+impl StructArrayReader {
+    pub fn new(children: Vec<Box<dyn ArrayReader>>, data_type: DataType) -> Self {
+        Self {
+            children,
+            data_type,
+        }
+    }
+
+    // For convenience when reading root of ORC file (expect Struct as root type)
+    pub fn next_struct_array_batch(
+        &mut self,
+        batch_size: usize,
+    ) -> Result<Option<Arc<StructArray>>> {
+        if self.children.is_empty() {
+            return Ok(None);
+        }
+
+        let children_arrays = self
+            .children
+            .iter_mut()
+            .map(|reader| reader.next_batch(batch_size))
+            .collect::<Result<Vec<_>>>()?;
+        let expected_length = children_arrays
+            .first()
+            .and_then(|a| a.as_ref().map(Array::len));
+        let all_child_len_match = children_arrays
+            .iter()
+            .all(|array| array.as_ref().map(Array::len) == expected_length);
+        if !all_child_len_match {
+            return Err(general_err!(
+                "Struct array reader has children with mismatched lengths"
+            ));
+        }
+
+        match expected_length {
+            None => Ok(None),
+            Some(length) => {
+                // TODO: account for nullability?
+                let array_data = ArrayDataBuilder::new(self.data_type.clone())
+                    .len(length)
+                    .child_data(
+                        children_arrays
+                            .iter()
+                            .flatten()
+                            .map(Array::to_data)
+                            .collect::<Vec<_>>(),
+                    );
+                let array_data = array_data.build()?;
+                Ok(Some(Arc::new(StructArray::from(array_data))))
+            }
+        }
+    }
+}
+
+impl ArrayReader for StructArrayReader {
+    fn next_batch(&mut self, batch_size: usize) -> Result<Option<ArrayRef>> {
+        self.next_struct_array_batch(batch_size)
+            .map(|opt| opt.map(|sa| sa as ArrayRef))
+    }
+}
diff --git a/arrow-orc/src/decompress.rs b/arrow-orc/src/decompress.rs
new file mode 100644
index 000000000000..b0f8688e0743
--- /dev/null
+++ b/arrow-orc/src/decompress.rs
@@ -0,0 +1,200 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Handle generic decompression of ORC files.
+
+use std::io::Read;
+
+use crate::errors::{OrcError, Result};
+use crate::proto;
+
+/// Supported generic compression types.
+/// Compression block size indicates maximum size of each compression chunk.
+/// No chunk will decompress to larger than thus block size.
+// TODO: use compression block size for other variants too
+#[derive(Debug, PartialEq, Eq, Copy, Clone)]
+pub enum CompressionType {
+    Lz4 { compression_block_size: u64 },
+    Lzo,
+    Snappy,
+    Zlib,
+    Zstd,
+}
+
+impl CompressionType {
+    pub fn from_proto(
+        value: proto::CompressionKind,
+        compression_block_size: Option<u64>,
+    ) -> Result<Option<Self>> {
+        let ct = match (value, compression_block_size) {
+            (proto::CompressionKind::None, None) => None,
+            (proto::CompressionKind::Zlib, Some(_size)) => Some(CompressionType::Zlib),
+            (proto::CompressionKind::Snappy, Some(_size)) => Some(CompressionType::Snappy),
+            (proto::CompressionKind::Lzo, Some(_size)) => Some(CompressionType::Lzo),
+            (proto::CompressionKind::Lz4, Some(compression_block_size)) => {
+                Some(CompressionType::Lz4 {
+                    compression_block_size,
+                })
+            }
+            (proto::CompressionKind::Zstd, Some(_size)) => Some(CompressionType::Zstd),
+            _ => {
+                return Err(OrcError::Corrupted(
+                    "Invalid compression settings".to_string(),
+                ))
+            }
+        };
+        Ok(ct)
+    }
+}
+
+/// ORC files are compressed in blocks, with a 3 byte header at the start
+/// of these blocks indicating the length of the block and whether it's
+/// compressed or not.
+fn decode_header(bytes: [u8; 3]) -> CompressionHeader {
+    let bytes = [bytes[0], bytes[1], bytes[2], 0];
+    let length = u32::from_le_bytes(bytes);
+    let is_original = length & 1 == 1;
+    // to clear the is_original bit
+    let length = length >> 1;
+    if is_original {
+        CompressionHeader::Original(length)
+    } else {
+        CompressionHeader::Compressed(length)
+    }
+}
+
+/// Indicates length of block and whether it's compressed or not.
+#[derive(Debug, PartialEq, Eq)]
+enum CompressionHeader {
+    Original(u32),
+    Compressed(u32),
+}
+
+/// Use to decompress a reader of bytes, according to ORC specification:
+///
+/// - Bytes are grouped into blocks
+/// - Each block has a 3 byte header, indicating length of block and if
+///   the block is compressed or the uncompressed original bytes
+pub struct Decompressor<R: Read> {
+    reader: R,
+    decompressed_block: Vec<u8>,
+    block_start_index: usize,
+    compression_type: CompressionType,
+}
+
+impl<R: Read> Decompressor<R> {
+    pub fn new(reader: R, compression_type: CompressionType) -> Self {
+        Self {
+            reader,
+            decompressed_block: vec![],
+            block_start_index: 0,
+            compression_type,
+        }
+    }
+
+    fn process_compressed_block(&mut self, compressed_block: &[u8]) -> Result<()> {
+        self.decompressed_block.clear();
+        match self.compression_type {
+            CompressionType::Lzo => {
+                let decompressed = lzokay_native::decompress_all(compressed_block, None)?;
+                self.decompressed_block.extend(decompressed);
+            }
+            CompressionType::Lz4 {
+                compression_block_size,
+            } => {
+                let decompressed =
+                    lz4_flex::block::decompress(compressed_block, compression_block_size as usize)?;
+                self.decompressed_block.extend(decompressed);
+            }
+            CompressionType::Snappy => {
+                let len = snap::raw::decompress_len(compressed_block)?;
+                self.decompressed_block.resize(len, 0);
+                let mut decoder = snap::raw::Decoder::new();
+                decoder.decompress(compressed_block, &mut self.decompressed_block)?;
+            }
+            CompressionType::Zlib => {
+                let mut reader = flate2::read::DeflateDecoder::new(compressed_block);
+                reader.read_to_end(&mut self.decompressed_block)?;
+            }
+            CompressionType::Zstd => {
+                zstd::stream::copy_decode(compressed_block, &mut self.decompressed_block)?;
+            }
+        };
+        Ok(())
+    }
+}
+
+impl<R: Read> Read for Decompressor<R> {
+    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
+        if buf.is_empty() {
+            return Ok(0);
+        }
+
+        // if finished copying from decompressed_block
+        // grab next block
+        if self.block_start_index >= self.decompressed_block.len() {
+            let mut header = [0; 3];
+            let size = self.reader.read(&mut header[..1])?;
+            if size == 0 {
+                // exhausted
+                return Ok(0);
+            }
+            // otherwise get other header bytes
+            self.reader.read_exact(&mut header[1..])?;
+
+            match decode_header(header) {
+                CompressionHeader::Original(len) => {
+                    self.decompressed_block.resize(len as usize, 0);
+                    self.reader.read_exact(&mut self.decompressed_block)?;
+                }
+                CompressionHeader::Compressed(len) => {
+                    let mut compressed = vec![0; len as usize];
+                    self.reader.read_exact(&mut compressed)?;
+                    self.process_compressed_block(&compressed)?;
+                }
+            };
+            self.block_start_index = 0;
+        }
+
+        // copy out the decompressed bytes
+        let bytes_written = buf
+            .len()
+            .min(self.decompressed_block.len() - self.block_start_index);
+        let end = self.block_start_index + bytes_written;
+        buf[..bytes_written].copy_from_slice(&self.decompressed_block[self.block_start_index..end]);
+        self.block_start_index = end;
+        Ok(bytes_written)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_decode_header() {
+        let header = [0x40, 0x0d, 0x03];
+        let actual = decode_header(header);
+        let expected = CompressionHeader::Compressed(100_000);
+        assert_eq!(expected, actual);
+
+        let header = [0x0b, 0x00, 0x00];
+        let actual = decode_header(header);
+        let expected = CompressionHeader::Original(5);
+        assert_eq!(expected, actual);
+    }
+}
diff --git a/arrow-orc/src/errors.rs b/arrow-orc/src/errors.rs
new file mode 100644
index 000000000000..e82afdaf3fcc
--- /dev/null
+++ b/arrow-orc/src/errors.rs
@@ -0,0 +1,137 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! ORC errors and related utility macros.
+
+use arrow_schema::ArrowError;
+use prost::DecodeError;
+use std::error::Error;
+use std::{io, result, str};
+
+// TODO: more specific errors
+#[derive(Debug)]
+pub enum OrcError {
+    /// Generic error
+    General(String),
+    /// When couldn't convert to/from Arrow schema
+    SchemaConversion(String),
+    /// When file doesn't conform to expected spec
+    Corrupted(String),
+    /// Functionality not yet implemented
+    NotYetImplemented(String),
+    /// External error
+    External(Box<dyn Error + Send + Sync>),
+}
+
+impl std::fmt::Display for OrcError {
+    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
+        match &self {
+            Self::General(m) => write!(fmt, "ORC error: {m}"),
+            Self::SchemaConversion(m) => write!(fmt, "ORC schema error: {m}"),
+            Self::Corrupted(m) => write!(fmt, "ORC file out of specification: {m}"),
+            Self::NotYetImplemented(m) => {
+                write!(fmt, "ORC feature not yet implemented: {m}")
+            }
+            Self::External(m) => write!(fmt, "External: {m}"),
+        }
+    }
+}
+
+impl Error for OrcError {
+    fn source(&self) -> Option<&(dyn Error + 'static)> {
+        match self {
+            Self::External(e) => Some(e.as_ref()),
+            _ => None,
+        }
+    }
+}
+
+impl From<io::Error> for OrcError {
+    fn from(e: io::Error) -> Self {
+        Self::External(Box::new(e))
+    }
+}
+
+impl From<snap::Error> for OrcError {
+    fn from(e: snap::Error) -> Self {
+        Self::External(Box::new(e))
+    }
+}
+
+impl From<lzokay_native::Error> for OrcError {
+    fn from(e: lzokay_native::Error) -> Self {
+        Self::External(Box::new(e))
+    }
+}
+
+impl From<str::Utf8Error> for OrcError {
+    fn from(e: str::Utf8Error) -> Self {
+        Self::External(Box::new(e))
+    }
+}
+
+impl From<lz4_flex::block::DecompressError> for OrcError {
+    fn from(e: lz4_flex::block::DecompressError) -> Self {
+        Self::External(Box::new(e))
+    }
+}
+
+impl From<ArrowError> for OrcError {
+    fn from(e: ArrowError) -> Self {
+        Self::External(Box::new(e))
+    }
+}
+
+impl From<DecodeError> for OrcError {
+    fn from(e: DecodeError) -> Self {
+        Self::External(Box::new(e))
+    }
+}
+
+/// A specialized `Result` for ORC errors.
+pub type Result<T, E = OrcError> = result::Result<T, E>;
+
+// ----------------------------------------------------------------------
+// Convenient macros for different errors
+
+macro_rules! general_err {
+    ($fmt:expr) => (crate::errors::OrcError::General($fmt.to_owned()));
+    ($fmt:expr, $($args:expr),*) => (crate::errors::OrcError::General(format!($fmt, $($args),*)));
+    ($e:expr, $fmt:expr) => (crate::errors::OrcError::General($fmt.to_owned(), $e));
+    ($e:ident, $fmt:expr, $($args:tt),*) => (
+        crate::errors::OrcError::General(&format!($fmt, $($args),*), $e));
+}
+
+macro_rules! nyi_err {
+    ($fmt:expr) => (crate::errors::OrcError::NotYetImplemented($fmt.to_owned()));
+    ($fmt:expr, $($args:expr),*) => (crate::errors::OrcError::NotYetImplemented(format!($fmt, $($args),*)));
+}
+
+// ----------------------------------------------------------------------
+// Convert ORC error into other errors
+
+impl From<OrcError> for ArrowError {
+    fn from(p: OrcError) -> Self {
+        Self::OrcError(format!("{p}"))
+    }
+}
+
+impl From<OrcError> for std::io::Error {
+    fn from(value: OrcError) -> Self {
+        std::io::Error::new(std::io::ErrorKind::Other, value)
+    }
+}
diff --git a/arrow-orc/src/file_metadata.rs b/arrow-orc/src/file_metadata.rs
new file mode 100644
index 000000000000..8ea66d808497
--- /dev/null
+++ b/arrow-orc/src/file_metadata.rs
@@ -0,0 +1,174 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Reading ORC file metadata.
+
+use arrow_schema::SchemaRef;
+use prost::Message;
+
+use crate::decompress::{CompressionType, Decompressor};
+use crate::errors::Result;
+use crate::proto;
+use crate::reader::Reader;
+use crate::schema::to_root_schema;
+
+use std::borrow::Cow;
+use std::io::Read;
+
+/// How many bytes to read in first read from file.
+/// Ideally will contain postscript, footer and metadata sections.
+const FIRST_READ_BYTES_SIZE: u64 = 16 * 1024;
+
+/// Given a reader over an ORC file, will seek to the end and read the metadata
+/// located at the tail of the file.
+pub fn parse_metadata<R: Reader>(reader: &mut R) -> Result<OrcMetadata> {
+    let reader_length = reader.len();
+    if reader_length == 0 {
+        return Err(general_err!("Cannot read metadata from empty file"));
+    }
+    // in case file is smaller than expected
+    let first_chunk_size = FIRST_READ_BYTES_SIZE.min(reader_length);
+
+    let offset = reader_length - first_chunk_size;
+    let bytes = reader.get_bytes(offset, first_chunk_size)?;
+
+    // safe split since bytes isn't empty
+    // postscript length is encoded as single last byte in file
+    let (bytes, postscript_length_byte) = bytes.split_at(bytes.len() - 1);
+    let postscript_length = postscript_length_byte[0] as usize;
+
+    // if file is too small for stated postscript section length
+    if postscript_length > bytes.len() {
+        return Err(general_err!("Invalid postscript length"));
+    }
+    // safe split as here we're guaranteed we have enough bytes for the postscript
+    let (bytes, postscript_bytes) = bytes.split_at(bytes.len() - postscript_length);
+    let postscript = proto::PostScript::decode(postscript_bytes)?;
+
+    let compression_type =
+        CompressionType::from_proto(postscript.compression(), postscript.compression_block_size)?;
+    let footer_length = postscript.footer_length();
+    let metadata_length = postscript.metadata_length();
+
+    let bytes_len = bytes.len() as u64;
+    let bytes = if (footer_length + metadata_length) > bytes_len {
+        // need to read more bytes as footer + metadata size exceeds initial read chunk
+        let bytes_to_read = (footer_length + metadata_length) - bytes_len;
+        let offset = reader_length - first_chunk_size - bytes_to_read;
+
+        let mut extra_bytes = reader.get_bytes(offset, bytes_to_read)?;
+        extra_bytes.extend_from_slice(bytes);
+        Cow::Owned(extra_bytes)
+    } else {
+        Cow::Borrowed(bytes)
+    };
+
+    // here on we are guaranteed enough bytes for whatever we need
+    let (bytes, footer_bytes) = bytes.split_at(bytes.len() - footer_length as usize);
+    // footer and metadata may be optionally compressed
+    // if compression was set in postscript
+    let footer = match compression_type {
+        Some(compression) => {
+            let mut bytes = vec![];
+            Decompressor::new(footer_bytes, compression).read_to_end(&mut bytes)?;
+            proto::Footer::decode(bytes.as_ref())?
+        }
+        None => proto::Footer::decode(footer_bytes)?,
+    };
+
+    let (_, metadata_bytes) = bytes.split_at(bytes.len() - metadata_length as usize);
+    // TODO: make use of metadata for statistics
+    let _metadata = match compression_type {
+        Some(compression) => {
+            let mut bytes = vec![];
+            Decompressor::new(metadata_bytes, compression).read_to_end(&mut bytes)?;
+            proto::Metadata::decode(bytes.as_ref())?
+        }
+        None => proto::Metadata::decode(metadata_bytes)?,
+    };
+
+    let schema = to_root_schema(&footer.types)?;
+    let number_of_rows = footer.number_of_rows();
+    let stripes = footer
+        .stripes
+        .into_iter()
+        .map(StripeInformation::from)
+        .collect::<Vec<_>>();
+
+    Ok(OrcMetadata {
+        compression_type,
+        stripes,
+        schema,
+        number_of_rows,
+    })
+}
+
+/// Contains general metadata about entire ORC file.
+#[derive(Debug)]
+pub struct OrcMetadata {
+    /// If ORC file has compression enabled or not
+    pub compression_type: Option<CompressionType>,
+    /// Information used for decoding each stripe
+    pub stripes: Vec<StripeInformation>,
+    /// Converted Arrow schema for entire file
+    pub schema: SchemaRef,
+    /// Total number of rows in the file
+    pub number_of_rows: u64,
+}
+
+/// Contains information used to locate stripes and their sections
+/// in the file.
+#[derive(Debug, Copy, Clone)]
+pub struct StripeInformation {
+    pub start_offset: u64,
+    pub index_length: u64,
+    pub data_length: u64,
+    pub footer_length: u64,
+    pub number_of_rows: u64,
+}
+
+impl From<proto::StripeInformation> for StripeInformation {
+    fn from(value: proto::StripeInformation) -> Self {
+        Self {
+            start_offset: value.offset(),
+            index_length: value.index_length(),
+            data_length: value.data_length(),
+            footer_length: value.footer_length(),
+            number_of_rows: value.number_of_rows(),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fs::File;
+
+    use super::*;
+
+    #[test]
+    fn test_parse_metadata() -> Result<()> {
+        let file_name = "demo-12-zlib.orc";
+        let mut file = File::open(format!("tests/data/{file_name}"))?;
+        let _ = parse_metadata(&mut file)?;
+
+        let file_name = "alltypes.none.orc";
+        let mut file = File::open(format!("tests/data/{file_name}"))?;
+        let _ = parse_metadata(&mut file)?;
+
+        Ok(())
+    }
+}
diff --git a/arrow-orc/src/lib.rs b/arrow-orc/src/lib.rs
new file mode 100644
index 000000000000..ecf80adf4d80
--- /dev/null
+++ b/arrow-orc/src/lib.rs
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This crate contains the official Native Rust implementation of
+//! [Apache ORC](https://orc.apache.org/), part of the
+//! [Apache Arrow](https://arrow.apache.org/) project.
+//!
+//! # Getting Started
+//! See [sync_reader] for synchronously reading ORC files to Arrow
+//! [`RecordBatch`]es.
+//!
+//! [`RecordBatch`]: arrow_array::RecordBatch
+
+#[macro_use]
+pub mod errors;
+pub mod sync_reader;
+
+mod array_reader;
+mod decompress;
+mod file_metadata;
+mod proto;
+mod reader;
+mod schema;
diff --git a/arrow-orc/src/proto.rs b/arrow-orc/src/proto.rs
new file mode 100644
index 000000000000..07545a68f2fa
--- /dev/null
+++ b/arrow-orc/src/proto.rs
@@ -0,0 +1,812 @@
+// This file was automatically generated through the regen.sh script, and should not be edited.
+
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct IntegerStatistics {
+    #[prost(sint64, optional, tag = "1")]
+    pub minimum: ::core::option::Option<i64>,
+    #[prost(sint64, optional, tag = "2")]
+    pub maximum: ::core::option::Option<i64>,
+    #[prost(sint64, optional, tag = "3")]
+    pub sum: ::core::option::Option<i64>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct DoubleStatistics {
+    #[prost(double, optional, tag = "1")]
+    pub minimum: ::core::option::Option<f64>,
+    #[prost(double, optional, tag = "2")]
+    pub maximum: ::core::option::Option<f64>,
+    #[prost(double, optional, tag = "3")]
+    pub sum: ::core::option::Option<f64>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct StringStatistics {
+    #[prost(string, optional, tag = "1")]
+    pub minimum: ::core::option::Option<::prost::alloc::string::String>,
+    #[prost(string, optional, tag = "2")]
+    pub maximum: ::core::option::Option<::prost::alloc::string::String>,
+    /// sum will store the total length of all strings in a stripe
+    #[prost(sint64, optional, tag = "3")]
+    pub sum: ::core::option::Option<i64>,
+    /// If the minimum or maximum value was longer than 1024 bytes, store a lower or upper
+    /// bound instead of the minimum or maximum values above.
+    #[prost(string, optional, tag = "4")]
+    pub lower_bound: ::core::option::Option<::prost::alloc::string::String>,
+    #[prost(string, optional, tag = "5")]
+    pub upper_bound: ::core::option::Option<::prost::alloc::string::String>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct BucketStatistics {
+    #[prost(uint64, repeated, tag = "1")]
+    pub count: ::prost::alloc::vec::Vec<u64>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct DecimalStatistics {
+    #[prost(string, optional, tag = "1")]
+    pub minimum: ::core::option::Option<::prost::alloc::string::String>,
+    #[prost(string, optional, tag = "2")]
+    pub maximum: ::core::option::Option<::prost::alloc::string::String>,
+    #[prost(string, optional, tag = "3")]
+    pub sum: ::core::option::Option<::prost::alloc::string::String>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct DateStatistics {
+    /// min,max values saved as days since epoch
+    #[prost(sint32, optional, tag = "1")]
+    pub minimum: ::core::option::Option<i32>,
+    #[prost(sint32, optional, tag = "2")]
+    pub maximum: ::core::option::Option<i32>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct TimestampStatistics {
+    /// min,max values saved as milliseconds since epoch
+    #[prost(sint64, optional, tag = "1")]
+    pub minimum: ::core::option::Option<i64>,
+    #[prost(sint64, optional, tag = "2")]
+    pub maximum: ::core::option::Option<i64>,
+    #[prost(sint64, optional, tag = "3")]
+    pub minimum_utc: ::core::option::Option<i64>,
+    #[prost(sint64, optional, tag = "4")]
+    pub maximum_utc: ::core::option::Option<i64>,
+    /// store the lower 6 TS digits for min/max to achieve nanosecond precision
+    #[prost(int32, optional, tag = "5")]
+    pub minimum_nanos: ::core::option::Option<i32>,
+    #[prost(int32, optional, tag = "6")]
+    pub maximum_nanos: ::core::option::Option<i32>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct BinaryStatistics {
+    /// sum will store the total binary blob length in a stripe
+    #[prost(sint64, optional, tag = "1")]
+    pub sum: ::core::option::Option<i64>,
+}
+/// Statistics for list and map
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct CollectionStatistics {
+    #[prost(uint64, optional, tag = "1")]
+    pub min_children: ::core::option::Option<u64>,
+    #[prost(uint64, optional, tag = "2")]
+    pub max_children: ::core::option::Option<u64>,
+    #[prost(uint64, optional, tag = "3")]
+    pub total_children: ::core::option::Option<u64>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct ColumnStatistics {
+    #[prost(uint64, optional, tag = "1")]
+    pub number_of_values: ::core::option::Option<u64>,
+    #[prost(message, optional, tag = "2")]
+    pub int_statistics: ::core::option::Option<IntegerStatistics>,
+    #[prost(message, optional, tag = "3")]
+    pub double_statistics: ::core::option::Option<DoubleStatistics>,
+    #[prost(message, optional, tag = "4")]
+    pub string_statistics: ::core::option::Option<StringStatistics>,
+    #[prost(message, optional, tag = "5")]
+    pub bucket_statistics: ::core::option::Option<BucketStatistics>,
+    #[prost(message, optional, tag = "6")]
+    pub decimal_statistics: ::core::option::Option<DecimalStatistics>,
+    #[prost(message, optional, tag = "7")]
+    pub date_statistics: ::core::option::Option<DateStatistics>,
+    #[prost(message, optional, tag = "8")]
+    pub binary_statistics: ::core::option::Option<BinaryStatistics>,
+    #[prost(message, optional, tag = "9")]
+    pub timestamp_statistics: ::core::option::Option<TimestampStatistics>,
+    #[prost(bool, optional, tag = "10")]
+    pub has_null: ::core::option::Option<bool>,
+    #[prost(uint64, optional, tag = "11")]
+    pub bytes_on_disk: ::core::option::Option<u64>,
+    #[prost(message, optional, tag = "12")]
+    pub collection_statistics: ::core::option::Option<CollectionStatistics>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct RowIndexEntry {
+    #[prost(uint64, repeated, tag = "1")]
+    pub positions: ::prost::alloc::vec::Vec<u64>,
+    #[prost(message, optional, tag = "2")]
+    pub statistics: ::core::option::Option<ColumnStatistics>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct RowIndex {
+    #[prost(message, repeated, tag = "1")]
+    pub entry: ::prost::alloc::vec::Vec<RowIndexEntry>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct BloomFilter {
+    #[prost(uint32, optional, tag = "1")]
+    pub num_hash_functions: ::core::option::Option<u32>,
+    #[prost(fixed64, repeated, packed = "false", tag = "2")]
+    pub bitset: ::prost::alloc::vec::Vec<u64>,
+    #[prost(bytes = "vec", optional, tag = "3")]
+    pub utf8bitset: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct BloomFilterIndex {
+    #[prost(message, repeated, tag = "1")]
+    pub bloom_filter: ::prost::alloc::vec::Vec<BloomFilter>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct Stream {
+    #[prost(enumeration = "stream::Kind", optional, tag = "1")]
+    pub kind: ::core::option::Option<i32>,
+    #[prost(uint32, optional, tag = "2")]
+    pub column: ::core::option::Option<u32>,
+    #[prost(uint64, optional, tag = "3")]
+    pub length: ::core::option::Option<u64>,
+}
+/// Nested message and enum types in `Stream`.
+pub mod stream {
+    /// if you add new index stream kinds, you need to make sure to update
+    /// StreamName to ensure it is added to the stripe in the right area
+    #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
+    #[repr(i32)]
+    pub enum Kind {
+        Present = 0,
+        Data = 1,
+        Length = 2,
+        DictionaryData = 3,
+        DictionaryCount = 4,
+        Secondary = 5,
+        RowIndex = 6,
+        BloomFilter = 7,
+        BloomFilterUtf8 = 8,
+        /// Virtual stream kinds to allocate space for encrypted index and data.
+        EncryptedIndex = 9,
+        EncryptedData = 10,
+        /// stripe statistics streams
+        StripeStatistics = 100,
+        /// A virtual stream kind that is used for setting the encryption IV.
+        FileStatistics = 101,
+    }
+    impl Kind {
+        /// String value of the enum field names used in the ProtoBuf definition.
+        ///
+        /// The values are not transformed in any way and thus are considered stable
+        /// (if the ProtoBuf definition does not change) and safe for programmatic use.
+        pub fn as_str_name(&self) -> &'static str {
+            match self {
+                Kind::Present => "PRESENT",
+                Kind::Data => "DATA",
+                Kind::Length => "LENGTH",
+                Kind::DictionaryData => "DICTIONARY_DATA",
+                Kind::DictionaryCount => "DICTIONARY_COUNT",
+                Kind::Secondary => "SECONDARY",
+                Kind::RowIndex => "ROW_INDEX",
+                Kind::BloomFilter => "BLOOM_FILTER",
+                Kind::BloomFilterUtf8 => "BLOOM_FILTER_UTF8",
+                Kind::EncryptedIndex => "ENCRYPTED_INDEX",
+                Kind::EncryptedData => "ENCRYPTED_DATA",
+                Kind::StripeStatistics => "STRIPE_STATISTICS",
+                Kind::FileStatistics => "FILE_STATISTICS",
+            }
+        }
+        /// Creates an enum from field names used in the ProtoBuf definition.
+        pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
+            match value {
+                "PRESENT" => Some(Self::Present),
+                "DATA" => Some(Self::Data),
+                "LENGTH" => Some(Self::Length),
+                "DICTIONARY_DATA" => Some(Self::DictionaryData),
+                "DICTIONARY_COUNT" => Some(Self::DictionaryCount),
+                "SECONDARY" => Some(Self::Secondary),
+                "ROW_INDEX" => Some(Self::RowIndex),
+                "BLOOM_FILTER" => Some(Self::BloomFilter),
+                "BLOOM_FILTER_UTF8" => Some(Self::BloomFilterUtf8),
+                "ENCRYPTED_INDEX" => Some(Self::EncryptedIndex),
+                "ENCRYPTED_DATA" => Some(Self::EncryptedData),
+                "STRIPE_STATISTICS" => Some(Self::StripeStatistics),
+                "FILE_STATISTICS" => Some(Self::FileStatistics),
+                _ => None,
+            }
+        }
+    }
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct ColumnEncoding {
+    #[prost(enumeration = "column_encoding::Kind", optional, tag = "1")]
+    pub kind: ::core::option::Option<i32>,
+    #[prost(uint32, optional, tag = "2")]
+    pub dictionary_size: ::core::option::Option<u32>,
+    /// The encoding of the bloom filters for this column:
+    ///    0 or missing = none or original
+    ///    1            = ORC-135 (utc for timestamps)
+    #[prost(uint32, optional, tag = "3")]
+    pub bloom_encoding: ::core::option::Option<u32>,
+}
+/// Nested message and enum types in `ColumnEncoding`.
+pub mod column_encoding {
+    #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
+    #[repr(i32)]
+    pub enum Kind {
+        Direct = 0,
+        Dictionary = 1,
+        DirectV2 = 2,
+        DictionaryV2 = 3,
+    }
+    impl Kind {
+        /// String value of the enum field names used in the ProtoBuf definition.
+        ///
+        /// The values are not transformed in any way and thus are considered stable
+        /// (if the ProtoBuf definition does not change) and safe for programmatic use.
+        pub fn as_str_name(&self) -> &'static str {
+            match self {
+                Kind::Direct => "DIRECT",
+                Kind::Dictionary => "DICTIONARY",
+                Kind::DirectV2 => "DIRECT_V2",
+                Kind::DictionaryV2 => "DICTIONARY_V2",
+            }
+        }
+        /// Creates an enum from field names used in the ProtoBuf definition.
+        pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
+            match value {
+                "DIRECT" => Some(Self::Direct),
+                "DICTIONARY" => Some(Self::Dictionary),
+                "DIRECT_V2" => Some(Self::DirectV2),
+                "DICTIONARY_V2" => Some(Self::DictionaryV2),
+                _ => None,
+            }
+        }
+    }
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct StripeEncryptionVariant {
+    #[prost(message, repeated, tag = "1")]
+    pub streams: ::prost::alloc::vec::Vec<Stream>,
+    #[prost(message, repeated, tag = "2")]
+    pub encoding: ::prost::alloc::vec::Vec<ColumnEncoding>,
+}
+// each stripe looks like:
+//    index streams
+//      unencrypted
+//      variant 1..N
+//    data streams
+//      unencrypted
+//      variant 1..N
+//    footer
+
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct StripeFooter {
+    #[prost(message, repeated, tag = "1")]
+    pub streams: ::prost::alloc::vec::Vec<Stream>,
+    #[prost(message, repeated, tag = "2")]
+    pub columns: ::prost::alloc::vec::Vec<ColumnEncoding>,
+    #[prost(string, optional, tag = "3")]
+    pub writer_timezone: ::core::option::Option<::prost::alloc::string::String>,
+    /// one for each column encryption variant
+    #[prost(message, repeated, tag = "4")]
+    pub encryption: ::prost::alloc::vec::Vec<StripeEncryptionVariant>,
+}
+// the file tail looks like:
+//    encrypted stripe statistics: ColumnarStripeStatistics (order by variant)
+//    stripe statistics: Metadata
+//    footer: Footer
+//    postscript: PostScript
+//    psLen: byte
+
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct StringPair {
+    #[prost(string, optional, tag = "1")]
+    pub key: ::core::option::Option<::prost::alloc::string::String>,
+    #[prost(string, optional, tag = "2")]
+    pub value: ::core::option::Option<::prost::alloc::string::String>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct Type {
+    #[prost(enumeration = "r#type::Kind", optional, tag = "1")]
+    pub kind: ::core::option::Option<i32>,
+    #[prost(uint32, repeated, tag = "2")]
+    pub subtypes: ::prost::alloc::vec::Vec<u32>,
+    #[prost(string, repeated, tag = "3")]
+    pub field_names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
+    #[prost(uint32, optional, tag = "4")]
+    pub maximum_length: ::core::option::Option<u32>,
+    #[prost(uint32, optional, tag = "5")]
+    pub precision: ::core::option::Option<u32>,
+    #[prost(uint32, optional, tag = "6")]
+    pub scale: ::core::option::Option<u32>,
+    #[prost(message, repeated, tag = "7")]
+    pub attributes: ::prost::alloc::vec::Vec<StringPair>,
+}
+/// Nested message and enum types in `Type`.
+pub mod r#type {
+    #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
+    #[repr(i32)]
+    pub enum Kind {
+        Boolean = 0,
+        Byte = 1,
+        Short = 2,
+        Int = 3,
+        Long = 4,
+        Float = 5,
+        Double = 6,
+        String = 7,
+        Binary = 8,
+        Timestamp = 9,
+        List = 10,
+        Map = 11,
+        Struct = 12,
+        Union = 13,
+        Decimal = 14,
+        Date = 15,
+        Varchar = 16,
+        Char = 17,
+        TimestampInstant = 18,
+    }
+    impl Kind {
+        /// String value of the enum field names used in the ProtoBuf definition.
+        ///
+        /// The values are not transformed in any way and thus are considered stable
+        /// (if the ProtoBuf definition does not change) and safe for programmatic use.
+        pub fn as_str_name(&self) -> &'static str {
+            match self {
+                Kind::Boolean => "BOOLEAN",
+                Kind::Byte => "BYTE",
+                Kind::Short => "SHORT",
+                Kind::Int => "INT",
+                Kind::Long => "LONG",
+                Kind::Float => "FLOAT",
+                Kind::Double => "DOUBLE",
+                Kind::String => "STRING",
+                Kind::Binary => "BINARY",
+                Kind::Timestamp => "TIMESTAMP",
+                Kind::List => "LIST",
+                Kind::Map => "MAP",
+                Kind::Struct => "STRUCT",
+                Kind::Union => "UNION",
+                Kind::Decimal => "DECIMAL",
+                Kind::Date => "DATE",
+                Kind::Varchar => "VARCHAR",
+                Kind::Char => "CHAR",
+                Kind::TimestampInstant => "TIMESTAMP_INSTANT",
+            }
+        }
+        /// Creates an enum from field names used in the ProtoBuf definition.
+        pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
+            match value {
+                "BOOLEAN" => Some(Self::Boolean),
+                "BYTE" => Some(Self::Byte),
+                "SHORT" => Some(Self::Short),
+                "INT" => Some(Self::Int),
+                "LONG" => Some(Self::Long),
+                "FLOAT" => Some(Self::Float),
+                "DOUBLE" => Some(Self::Double),
+                "STRING" => Some(Self::String),
+                "BINARY" => Some(Self::Binary),
+                "TIMESTAMP" => Some(Self::Timestamp),
+                "LIST" => Some(Self::List),
+                "MAP" => Some(Self::Map),
+                "STRUCT" => Some(Self::Struct),
+                "UNION" => Some(Self::Union),
+                "DECIMAL" => Some(Self::Decimal),
+                "DATE" => Some(Self::Date),
+                "VARCHAR" => Some(Self::Varchar),
+                "CHAR" => Some(Self::Char),
+                "TIMESTAMP_INSTANT" => Some(Self::TimestampInstant),
+                _ => None,
+            }
+        }
+    }
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct StripeInformation {
+    /// the global file offset of the start of the stripe
+    #[prost(uint64, optional, tag = "1")]
+    pub offset: ::core::option::Option<u64>,
+    /// the number of bytes of index
+    #[prost(uint64, optional, tag = "2")]
+    pub index_length: ::core::option::Option<u64>,
+    /// the number of bytes of data
+    #[prost(uint64, optional, tag = "3")]
+    pub data_length: ::core::option::Option<u64>,
+    /// the number of bytes in the stripe footer
+    #[prost(uint64, optional, tag = "4")]
+    pub footer_length: ::core::option::Option<u64>,
+    /// the number of rows in this stripe
+    #[prost(uint64, optional, tag = "5")]
+    pub number_of_rows: ::core::option::Option<u64>,
+    /// If this is present, the reader should use this value for the encryption
+    /// stripe id for setting the encryption IV. Otherwise, the reader should
+    /// use one larger than the previous stripe's encryptStripeId.
+    /// For unmerged ORC files, the first stripe will use 1 and the rest of the
+    /// stripes won't have it set. For merged files, the stripe information
+    /// will be copied from their original files and thus the first stripe of
+    /// each of the input files will reset it to 1.
+    /// Note that 1 was choosen, because protobuf v3 doesn't serialize
+    /// primitive types that are the default (eg. 0).
+    #[prost(uint64, optional, tag = "6")]
+    pub encrypt_stripe_id: ::core::option::Option<u64>,
+    /// For each encryption variant, the new encrypted local key to use
+    /// until we find a replacement.
+    #[prost(bytes = "vec", repeated, tag = "7")]
+    pub encrypted_local_keys: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec<u8>>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct UserMetadataItem {
+    #[prost(string, optional, tag = "1")]
+    pub name: ::core::option::Option<::prost::alloc::string::String>,
+    #[prost(bytes = "vec", optional, tag = "2")]
+    pub value: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
+}
+/// StripeStatistics (1 per a stripe), which each contain the
+/// ColumnStatistics for each column.
+/// This message type is only used in ORC v0 and v1.
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct StripeStatistics {
+    #[prost(message, repeated, tag = "1")]
+    pub col_stats: ::prost::alloc::vec::Vec<ColumnStatistics>,
+}
+/// This message type is only used in ORC v0 and v1.
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct Metadata {
+    #[prost(message, repeated, tag = "1")]
+    pub stripe_stats: ::prost::alloc::vec::Vec<StripeStatistics>,
+}
+/// In ORC v2 (and for encrypted columns in v1), each column has
+/// their column statistics written separately.
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct ColumnarStripeStatistics {
+    /// one value for each stripe in the file
+    #[prost(message, repeated, tag = "1")]
+    pub col_stats: ::prost::alloc::vec::Vec<ColumnStatistics>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct FileStatistics {
+    #[prost(message, repeated, tag = "1")]
+    pub column: ::prost::alloc::vec::Vec<ColumnStatistics>,
+}
+/// How was the data masked? This isn't necessary for reading the file, but
+/// is documentation about how the file was written.
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct DataMask {
+    /// the kind of masking, which may include third party masks
+    #[prost(string, optional, tag = "1")]
+    pub name: ::core::option::Option<::prost::alloc::string::String>,
+    /// parameters for the mask
+    #[prost(string, repeated, tag = "2")]
+    pub mask_parameters: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
+    /// the unencrypted column roots this mask was applied to
+    #[prost(uint32, repeated, tag = "3")]
+    pub columns: ::prost::alloc::vec::Vec<u32>,
+}
+/// Information about the encryption keys.
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct EncryptionKey {
+    #[prost(string, optional, tag = "1")]
+    pub key_name: ::core::option::Option<::prost::alloc::string::String>,
+    #[prost(uint32, optional, tag = "2")]
+    pub key_version: ::core::option::Option<u32>,
+    #[prost(enumeration = "EncryptionAlgorithm", optional, tag = "3")]
+    pub algorithm: ::core::option::Option<i32>,
+}
+/// The description of an encryption variant.
+/// Each variant is a single subtype that is encrypted with a single key.
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct EncryptionVariant {
+    /// the column id of the root
+    #[prost(uint32, optional, tag = "1")]
+    pub root: ::core::option::Option<u32>,
+    /// The master key that was used to encrypt the local key, referenced as
+    /// an index into the Encryption.key list.
+    #[prost(uint32, optional, tag = "2")]
+    pub key: ::core::option::Option<u32>,
+    /// the encrypted key for the file footer
+    #[prost(bytes = "vec", optional, tag = "3")]
+    pub encrypted_key: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
+    /// the stripe statistics for this variant
+    #[prost(message, repeated, tag = "4")]
+    pub stripe_statistics: ::prost::alloc::vec::Vec<Stream>,
+    /// encrypted file statistics as a FileStatistics
+    #[prost(bytes = "vec", optional, tag = "5")]
+    pub file_statistics: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct Encryption {
+    /// all of the masks used in this file
+    #[prost(message, repeated, tag = "1")]
+    pub mask: ::prost::alloc::vec::Vec<DataMask>,
+    /// all of the keys used in this file
+    #[prost(message, repeated, tag = "2")]
+    pub key: ::prost::alloc::vec::Vec<EncryptionKey>,
+    /// The encrypted variants.
+    /// Readers should prefer the first variant that the user has access to
+    /// the corresponding key. If they don't have access to any of the keys,
+    /// they should get the unencrypted masked data.
+    #[prost(message, repeated, tag = "3")]
+    pub variants: ::prost::alloc::vec::Vec<EncryptionVariant>,
+    /// How are the local keys encrypted?
+    #[prost(enumeration = "KeyProviderKind", optional, tag = "4")]
+    pub key_provider: ::core::option::Option<i32>,
+}
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct Footer {
+    #[prost(uint64, optional, tag = "1")]
+    pub header_length: ::core::option::Option<u64>,
+    #[prost(uint64, optional, tag = "2")]
+    pub content_length: ::core::option::Option<u64>,
+    #[prost(message, repeated, tag = "3")]
+    pub stripes: ::prost::alloc::vec::Vec<StripeInformation>,
+    #[prost(message, repeated, tag = "4")]
+    pub types: ::prost::alloc::vec::Vec<Type>,
+    #[prost(message, repeated, tag = "5")]
+    pub metadata: ::prost::alloc::vec::Vec<UserMetadataItem>,
+    #[prost(uint64, optional, tag = "6")]
+    pub number_of_rows: ::core::option::Option<u64>,
+    #[prost(message, repeated, tag = "7")]
+    pub statistics: ::prost::alloc::vec::Vec<ColumnStatistics>,
+    #[prost(uint32, optional, tag = "8")]
+    pub row_index_stride: ::core::option::Option<u32>,
+    /// Each implementation that writes ORC files should register for a code
+    /// 0 = ORC Java
+    /// 1 = ORC C++
+    /// 2 = Presto
+    /// 3 = Scritchley Go from <https://github.com/scritchley/orc>
+    /// 4 = Trino
+    #[prost(uint32, optional, tag = "9")]
+    pub writer: ::core::option::Option<u32>,
+    /// information about the encryption in this file
+    #[prost(message, optional, tag = "10")]
+    pub encryption: ::core::option::Option<Encryption>,
+    #[prost(enumeration = "CalendarKind", optional, tag = "11")]
+    pub calendar: ::core::option::Option<i32>,
+    /// informative description about the version of the software that wrote
+    /// the file. It is assumed to be within a given writer, so for example
+    /// ORC 1.7.2 = "1.7.2". It may include suffixes, such as "-SNAPSHOT".
+    #[prost(string, optional, tag = "12")]
+    pub software_version: ::core::option::Option<::prost::alloc::string::String>,
+}
+/// Serialized length must be less that 255 bytes
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct PostScript {
+    #[prost(uint64, optional, tag = "1")]
+    pub footer_length: ::core::option::Option<u64>,
+    #[prost(enumeration = "CompressionKind", optional, tag = "2")]
+    pub compression: ::core::option::Option<i32>,
+    #[prost(uint64, optional, tag = "3")]
+    pub compression_block_size: ::core::option::Option<u64>,
+    /// the version of the file format
+    ///    \[0, 11\] = Hive 0.11
+    ///    \[0, 12\] = Hive 0.12
+    #[prost(uint32, repeated, tag = "4")]
+    pub version: ::prost::alloc::vec::Vec<u32>,
+    #[prost(uint64, optional, tag = "5")]
+    pub metadata_length: ::core::option::Option<u64>,
+    /// The version of the writer that wrote the file. This number is
+    /// updated when we make fixes or large changes to the writer so that
+    /// readers can detect whether a given bug is present in the data.
+    ///
+    /// Only the Java ORC writer may use values under 6 (or missing) so that
+    /// readers that predate ORC-202 treat the new writers correctly. Each
+    /// writer should assign their own sequence of versions starting from 6.
+    ///
+    /// Version of the ORC Java writer:
+    ///    0 = original
+    ///    1 = HIVE-8732 fixed (fixed stripe/file maximum statistics &
+    ///                         string statistics use utf8 for min/max)
+    ///    2 = HIVE-4243 fixed (use real column names from Hive tables)
+    ///    3 = HIVE-12055 added (vectorized writer implementation)
+    ///    4 = HIVE-13083 fixed (decimals write present stream correctly)
+    ///    5 = ORC-101 fixed (bloom filters use utf8 consistently)
+    ///    6 = ORC-135 fixed (timestamp statistics use utc)
+    ///    7 = ORC-517 fixed (decimal64 min/max incorrect)
+    ///    8 = ORC-203 added (trim very long string statistics)
+    ///    9 = ORC-14 added (column encryption)
+    ///
+    /// Version of the ORC C++ writer:
+    ///    6 = original
+    ///
+    /// Version of the Presto writer:
+    ///    6 = original
+    ///
+    /// Version of the Scritchley Go writer:
+    ///    6 = original
+    ///
+    /// Version of the Trino writer:
+    ///    6 = original
+    ///
+    #[prost(uint32, optional, tag = "6")]
+    pub writer_version: ::core::option::Option<u32>,
+    /// the number of bytes in the encrypted stripe statistics
+    #[prost(uint64, optional, tag = "7")]
+    pub stripe_statistics_length: ::core::option::Option<u64>,
+    /// Leave this last in the record
+    #[prost(string, optional, tag = "8000")]
+    pub magic: ::core::option::Option<::prost::alloc::string::String>,
+}
+/// The contents of the file tail that must be serialized.
+/// This gets serialized as part of OrcSplit, also used by footer cache.
+#[allow(clippy::derive_partial_eq_without_eq)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct FileTail {
+    #[prost(message, optional, tag = "1")]
+    pub postscript: ::core::option::Option<PostScript>,
+    #[prost(message, optional, tag = "2")]
+    pub footer: ::core::option::Option<Footer>,
+    #[prost(uint64, optional, tag = "3")]
+    pub file_length: ::core::option::Option<u64>,
+    #[prost(uint64, optional, tag = "4")]
+    pub postscript_length: ::core::option::Option<u64>,
+}
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
+#[repr(i32)]
+pub enum EncryptionAlgorithm {
+    /// used for detecting future algorithms
+    UnknownEncryption = 0,
+    AesCtr128 = 1,
+    AesCtr256 = 2,
+}
+impl EncryptionAlgorithm {
+    /// String value of the enum field names used in the ProtoBuf definition.
+    ///
+    /// The values are not transformed in any way and thus are considered stable
+    /// (if the ProtoBuf definition does not change) and safe for programmatic use.
+    pub fn as_str_name(&self) -> &'static str {
+        match self {
+            EncryptionAlgorithm::UnknownEncryption => "UNKNOWN_ENCRYPTION",
+            EncryptionAlgorithm::AesCtr128 => "AES_CTR_128",
+            EncryptionAlgorithm::AesCtr256 => "AES_CTR_256",
+        }
+    }
+    /// Creates an enum from field names used in the ProtoBuf definition.
+    pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
+        match value {
+            "UNKNOWN_ENCRYPTION" => Some(Self::UnknownEncryption),
+            "AES_CTR_128" => Some(Self::AesCtr128),
+            "AES_CTR_256" => Some(Self::AesCtr256),
+            _ => None,
+        }
+    }
+}
+/// Which KeyProvider encrypted the local keys.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
+#[repr(i32)]
+pub enum KeyProviderKind {
+    Unknown = 0,
+    Hadoop = 1,
+    Aws = 2,
+    Gcp = 3,
+    Azure = 4,
+}
+impl KeyProviderKind {
+    /// String value of the enum field names used in the ProtoBuf definition.
+    ///
+    /// The values are not transformed in any way and thus are considered stable
+    /// (if the ProtoBuf definition does not change) and safe for programmatic use.
+    pub fn as_str_name(&self) -> &'static str {
+        match self {
+            KeyProviderKind::Unknown => "UNKNOWN",
+            KeyProviderKind::Hadoop => "HADOOP",
+            KeyProviderKind::Aws => "AWS",
+            KeyProviderKind::Gcp => "GCP",
+            KeyProviderKind::Azure => "AZURE",
+        }
+    }
+    /// Creates an enum from field names used in the ProtoBuf definition.
+    pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
+        match value {
+            "UNKNOWN" => Some(Self::Unknown),
+            "HADOOP" => Some(Self::Hadoop),
+            "AWS" => Some(Self::Aws),
+            "GCP" => Some(Self::Gcp),
+            "AZURE" => Some(Self::Azure),
+            _ => None,
+        }
+    }
+}
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
+#[repr(i32)]
+pub enum CalendarKind {
+    UnknownCalendar = 0,
+    /// A hybrid Julian/Gregorian calendar with a cutover point in October 1582.
+    JulianGregorian = 1,
+    /// A calendar that extends the Gregorian calendar back forever.
+    ProlepticGregorian = 2,
+}
+impl CalendarKind {
+    /// String value of the enum field names used in the ProtoBuf definition.
+    ///
+    /// The values are not transformed in any way and thus are considered stable
+    /// (if the ProtoBuf definition does not change) and safe for programmatic use.
+    pub fn as_str_name(&self) -> &'static str {
+        match self {
+            CalendarKind::UnknownCalendar => "UNKNOWN_CALENDAR",
+            CalendarKind::JulianGregorian => "JULIAN_GREGORIAN",
+            CalendarKind::ProlepticGregorian => "PROLEPTIC_GREGORIAN",
+        }
+    }
+    /// Creates an enum from field names used in the ProtoBuf definition.
+    pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
+        match value {
+            "UNKNOWN_CALENDAR" => Some(Self::UnknownCalendar),
+            "JULIAN_GREGORIAN" => Some(Self::JulianGregorian),
+            "PROLEPTIC_GREGORIAN" => Some(Self::ProlepticGregorian),
+            _ => None,
+        }
+    }
+}
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
+#[repr(i32)]
+pub enum CompressionKind {
+    None = 0,
+    Zlib = 1,
+    Snappy = 2,
+    Lzo = 3,
+    Lz4 = 4,
+    Zstd = 5,
+}
+impl CompressionKind {
+    /// String value of the enum field names used in the ProtoBuf definition.
+    ///
+    /// The values are not transformed in any way and thus are considered stable
+    /// (if the ProtoBuf definition does not change) and safe for programmatic use.
+    pub fn as_str_name(&self) -> &'static str {
+        match self {
+            CompressionKind::None => "NONE",
+            CompressionKind::Zlib => "ZLIB",
+            CompressionKind::Snappy => "SNAPPY",
+            CompressionKind::Lzo => "LZO",
+            CompressionKind::Lz4 => "LZ4",
+            CompressionKind::Zstd => "ZSTD",
+        }
+    }
+    /// Creates an enum from field names used in the ProtoBuf definition.
+    pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
+        match value {
+            "NONE" => Some(Self::None),
+            "ZLIB" => Some(Self::Zlib),
+            "SNAPPY" => Some(Self::Snappy),
+            "LZO" => Some(Self::Lzo),
+            "LZ4" => Some(Self::Lz4),
+            "ZSTD" => Some(Self::Zstd),
+            _ => None,
+        }
+    }
+}
diff --git a/arrow-orc/src/reader.rs b/arrow-orc/src/reader.rs
new file mode 100644
index 000000000000..da25ab624d6a
--- /dev/null
+++ b/arrow-orc/src/reader.rs
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Traits abstract reading bytes from a source
+
+use std::{
+    fs::File,
+    io::{BufReader, Read, Seek, SeekFrom},
+};
+
+/// Primary source used for reading required bytes for operations.
+pub trait Reader {
+    type T: Read;
+
+    /// Get total length of bytes. Useful for parsing the metadata located at
+    /// the end of the file.
+    fn len(&self) -> u64;
+
+    /// Get a reader starting at a specific offset.
+    fn get_read(&self, offset_from_start: u64) -> std::io::Result<Self::T>;
+
+    /// Read bytes from an offset with specific length.
+    fn get_bytes(&self, offset_from_start: u64, length: u64) -> std::io::Result<Vec<u8>> {
+        let mut bytes = vec![0; length as usize];
+        self.get_read(offset_from_start)?
+            .take(length)
+            .read_exact(&mut bytes)?;
+        Ok(bytes)
+    }
+}
+
+impl Reader for File {
+    type T = BufReader<File>;
+
+    fn len(&self) -> u64 {
+        self.metadata().map(|m| m.len()).unwrap_or(0u64)
+    }
+
+    /// Care needs to be taken when using this simulatenously as underlying
+    /// file descriptor is the same and will be affected by other invocations.
+    ///
+    /// See [`File::try_clone()`] for more details.
+    fn get_read(&self, offset_from_start: u64) -> std::io::Result<Self::T> {
+        let mut reader = self.try_clone()?;
+        reader.seek(SeekFrom::Start(offset_from_start))?;
+        Ok(BufReader::new(self.try_clone()?))
+    }
+}
diff --git a/arrow-orc/src/schema.rs b/arrow-orc/src/schema.rs
new file mode 100644
index 000000000000..428f1c303bf2
--- /dev/null
+++ b/arrow-orc/src/schema.rs
@@ -0,0 +1,144 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Convert ORC schema to Arrow schema
+
+use std::sync::Arc;
+
+use arrow_schema::{DataType, Field, Fields, Schema, SchemaRef};
+
+use crate::errors::{OrcError, Result};
+use crate::proto;
+
+/// Convert ORC types into a root Arrow Schema, where expects the first
+/// type to be a Struct, with subsequent fields being the child fields
+/// of the struct.
+///
+/// Returns error if found an unsupported data type.
+pub fn to_root_schema(types: &[proto::Type]) -> Result<SchemaRef> {
+    if types.is_empty() {
+        return Err(OrcError::SchemaConversion(
+            "Empty type list when reading".to_string(),
+        ));
+    }
+    let root = &types[0];
+    let root = orc_type_to_arrow_type(root, types)?;
+
+    match root {
+        DataType::Struct(fields) => Ok(Arc::new(Schema::new(fields))),
+        data_type => Err(OrcError::SchemaConversion(format!(
+            "Unexpected root data type when reading: {data_type}"
+        ))),
+    }
+}
+
+fn orc_type_to_arrow_type(orc_type: &proto::Type, all_types: &[proto::Type]) -> Result<DataType> {
+    use proto::r#type::Kind;
+    match orc_type.kind() {
+        Kind::Boolean => Ok(DataType::Boolean),
+        Kind::Byte => Ok(DataType::Int8),
+        Kind::Short => Ok(DataType::Int16),
+        Kind::Int => Ok(DataType::Int32),
+        Kind::Long => Ok(DataType::Int64),
+        Kind::Float => Ok(DataType::Float32),
+        Kind::Double => Ok(DataType::Float64),
+        Kind::Binary => Ok(DataType::Binary),
+        Kind::String | Kind::Varchar | Kind::Char => Ok(DataType::Utf8),
+        Kind::Date => Ok(DataType::Date32),
+        Kind::Timestamp => {
+            // TODO: support
+            Err(nyi_err!("ORC data type: Timestamp"))
+        }
+        Kind::TimestampInstant => {
+            // TODO: support
+            Err(nyi_err!("ORC data type: TimestampInstant"))
+        }
+        Kind::Decimal => {
+            let _precision = orc_type.precision() as u8;
+            let _scale = orc_type.scale() as i8;
+            // TODO: support
+            Err(nyi_err!("ORC data type: Decimal"))
+        }
+        Kind::List => {
+            let _subtypes = &orc_type.subtypes;
+            // TODO: support
+            Err(nyi_err!("ORC data type: List"))
+        }
+        Kind::Map => {
+            let _subtypes = &orc_type.subtypes;
+            // TODO: support
+            Err(nyi_err!("ORC data type: Map"))
+        }
+        Kind::Struct => {
+            let field_names = &orc_type.field_names;
+            let subtypes = &orc_type.subtypes;
+            let fields = field_names
+                .iter()
+                .zip(subtypes)
+                .map(|(name, &index)| {
+                    all_types
+                        .get(index as usize)
+                        .ok_or_else(|| {
+                            OrcError::SchemaConversion(format!(
+                                "Struct column index out of bounds: {index}"
+                            ))
+                        })
+                        .and_then(|orc_type| orc_type_to_arrow_type(orc_type, all_types))
+                        .map(|dt| Field::new(name, dt, true))
+                })
+                .collect::<Result<Vec<_>>>()?;
+            let fields = Fields::from(fields);
+            Ok(DataType::Struct(fields))
+        }
+        Kind::Union => {
+            let _subtypes = &orc_type.subtypes;
+            // TODO: support
+            Err(nyi_err!("ORC data type: Union"))
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::proto::Type;
+
+    use super::*;
+    use proto::r#type::Kind;
+
+    #[test]
+    fn test_to_root_schema() -> Result<()> {
+        // empty schema is error
+        let err = to_root_schema(&[]);
+        assert!(err.is_err());
+        assert_eq!(
+            err.err().unwrap().to_string(),
+            "ORC schema error: Empty type list when reading"
+        );
+
+        // non-struct root is error
+        let mut t = Type::default();
+        t.set_kind(Kind::Boolean);
+        let err = to_root_schema(&[t]);
+        assert!(err.is_err());
+        assert_eq!(
+            err.err().unwrap().to_string(),
+            "ORC schema error: Unexpected root data type when reading: Boolean"
+        );
+
+        Ok(())
+    }
+}
diff --git a/arrow-orc/src/sync_reader.rs b/arrow-orc/src/sync_reader.rs
new file mode 100644
index 000000000000..d8b0bf62136d
--- /dev/null
+++ b/arrow-orc/src/sync_reader.rs
@@ -0,0 +1,106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! ORC synchronous RecordBatch reader
+
+use std::fs::File;
+use std::rc::Rc;
+
+use arrow_array::{RecordBatch, RecordBatchReader};
+use arrow_schema::{ArrowError, DataType, SchemaRef};
+
+use crate::array_reader::struct_array_reader::StructArrayReader;
+use crate::errors::Result;
+use crate::file_metadata::{parse_metadata, OrcMetadata};
+
+/// Use to get an ORC file's schema as an Arrow schema.
+pub fn get_orc_file_schema(mut reader: File) -> Result<SchemaRef> {
+    let metadata = parse_metadata(&mut reader)?;
+    Ok(metadata.schema)
+}
+
+pub struct OrcSyncRecordBatchReader {
+    batch_size: usize,
+    metadata: Rc<OrcMetadata>,
+    array_reader: Box<StructArrayReader>,
+}
+
+impl Iterator for OrcSyncRecordBatchReader {
+    type Item = Result<RecordBatch, ArrowError>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self.array_reader.next_struct_array_batch(self.batch_size) {
+            Err(error) => Some(Err(error.into())),
+            Ok(struct_array_opt) => {
+                struct_array_opt.map(|struct_array| Ok(RecordBatch::from(struct_array.as_ref())))
+            }
+        }
+    }
+}
+
+impl RecordBatchReader for OrcSyncRecordBatchReader {
+    fn schema(&self) -> SchemaRef {
+        self.metadata.schema.clone()
+    }
+}
+
+impl OrcSyncRecordBatchReader {
+    pub fn try_new(reader: File) -> Result<Self> {
+        Self::try_new_with_options(reader, Default::default())
+    }
+
+    pub fn try_new_with_options(mut reader: File, options: OrcReaderOptions) -> Result<Self> {
+        // TODO: introduce a general config checker (in builder?) with own error variant
+        if options.batch_size == 0 {
+            return Err(general_err!("Batch size cannot be 0"));
+        }
+
+        let metadata = parse_metadata(&mut reader)?;
+
+        let metadata = Rc::new(metadata);
+
+        let data_type = DataType::Struct(metadata.schema.fields.clone());
+        // TODO: create child array readers here
+        let struct_array_reader = StructArrayReader::new(vec![], data_type);
+
+        Ok(Self {
+            batch_size: options.batch_size,
+            metadata,
+            array_reader: Box::new(struct_array_reader),
+        })
+    }
+
+    pub fn total_number_of_rows(&self) -> u64 {
+        self.metadata.number_of_rows
+    }
+}
+
+/// Supported options for customizing behaviour of the reader.
+#[derive(Debug, Clone, Default)]
+pub struct OrcReaderOptions {
+    /// Max size of [`RecordBatch`]es to emit per iteration.
+    /// Note that the emitted batches may be less than this limit.
+    ///
+    /// Must be greater than zero.
+    pub batch_size: usize,
+    /// If provided, project only the selected columns. Must be same
+    /// length as number of columns, where true marks a projection and
+    /// false will not read the column.
+    ///
+    /// Set to `None` to project all columns.
+    pub projection_mask: Option<Vec<bool>>,
+}
diff --git a/arrow-orc/tests/data/alltypes.lz4.orc b/arrow-orc/tests/data/alltypes.lz4.orc
new file mode 100644
index 000000000000..bdf49a956a0d
Binary files /dev/null and b/arrow-orc/tests/data/alltypes.lz4.orc differ
diff --git a/arrow-orc/tests/data/alltypes.lzo.orc b/arrow-orc/tests/data/alltypes.lzo.orc
new file mode 100644
index 000000000000..f66be95f3f90
Binary files /dev/null and b/arrow-orc/tests/data/alltypes.lzo.orc differ
diff --git a/arrow-orc/tests/data/alltypes.none.orc b/arrow-orc/tests/data/alltypes.none.orc
new file mode 100644
index 000000000000..61ad1300addf
Binary files /dev/null and b/arrow-orc/tests/data/alltypes.none.orc differ
diff --git a/arrow-orc/tests/data/alltypes.snappy.orc b/arrow-orc/tests/data/alltypes.snappy.orc
new file mode 100644
index 000000000000..525650001025
Binary files /dev/null and b/arrow-orc/tests/data/alltypes.snappy.orc differ
diff --git a/arrow-orc/tests/data/alltypes.zlib.orc b/arrow-orc/tests/data/alltypes.zlib.orc
new file mode 100644
index 000000000000..9fabefaf1074
Binary files /dev/null and b/arrow-orc/tests/data/alltypes.zlib.orc differ
diff --git a/arrow-orc/tests/data/alltypes.zstd.orc b/arrow-orc/tests/data/alltypes.zstd.orc
new file mode 100644
index 000000000000..741002f97c86
Binary files /dev/null and b/arrow-orc/tests/data/alltypes.zstd.orc differ
diff --git a/arrow-orc/tests/data/demo-12-zlib.orc b/arrow-orc/tests/data/demo-12-zlib.orc
new file mode 100644
index 000000000000..862dd27af27b
Binary files /dev/null and b/arrow-orc/tests/data/demo-12-zlib.orc differ
diff --git a/arrow-schema/src/error.rs b/arrow-schema/src/error.rs
index 8ea533db89af..2e79eafcc12a 100644
--- a/arrow-schema/src/error.rs
+++ b/arrow-schema/src/error.rs
@@ -39,6 +39,7 @@ pub enum ArrowError {
     IpcError(String),
     InvalidArgumentError(String),
     ParquetError(String),
+    OrcError(String),
     /// Error during import or export to/from the C Data Interface
     CDataInterface(String),
     DictionaryKeyOverflowError,
@@ -93,6 +94,7 @@ impl Display for ArrowError {
             ArrowError::ParquetError(desc) => {
                 write!(f, "Parquet argument error: {desc}")
             }
+            ArrowError::OrcError(desc) => write!(f, "ORC argument error: {desc}"),
             ArrowError::CDataInterface(desc) => {
                 write!(f, "C Data interface error: {desc}")
             }
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 6f9d1b5f302b..e2012d7c5c31 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -25,3 +25,4 @@ arrow-flight/src/sql/arrow.flight.protocol.sql.rs
 .github/*
 parquet/src/bin/parquet-fromcsv-help.txt
 arrow-flight/examples/data/*
+arrow-orc/src/proto.rs
diff --git a/format/orc_proto.proto b/format/orc_proto.proto
new file mode 100644
index 000000000000..ff05657a5472
--- /dev/null
+++ b/format/orc_proto.proto
@@ -0,0 +1,451 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto2";
+
+package orc.proto;
+
+option java_package = "org.apache.orc";
+
+message IntegerStatistics  {
+  optional sint64 minimum = 1;
+  optional sint64 maximum = 2;
+  optional sint64 sum = 3;
+}
+
+message DoubleStatistics {
+  optional double minimum = 1;
+  optional double maximum = 2;
+  optional double sum = 3;
+}
+
+message StringStatistics {
+  optional string minimum = 1;
+  optional string maximum = 2;
+  // sum will store the total length of all strings in a stripe
+  optional sint64 sum = 3;
+  // If the minimum or maximum value was longer than 1024 bytes, store a lower or upper
+  // bound instead of the minimum or maximum values above.
+  optional string lowerBound = 4;
+  optional string upperBound = 5;
+}
+
+message BucketStatistics {
+  repeated uint64 count = 1 [packed=true];
+}
+
+message DecimalStatistics {
+  optional string minimum = 1;
+  optional string maximum = 2;
+  optional string sum = 3;
+}
+
+message DateStatistics {
+  // min,max values saved as days since epoch
+  optional sint32 minimum = 1;
+  optional sint32 maximum = 2;
+}
+
+message TimestampStatistics {
+  // min,max values saved as milliseconds since epoch
+  optional sint64 minimum = 1;
+  optional sint64 maximum = 2;
+  optional sint64 minimumUtc = 3;
+  optional sint64 maximumUtc = 4;
+  // store the lower 6 TS digits for min/max to achieve nanosecond precision
+  optional int32 minimumNanos = 5;
+  optional int32 maximumNanos = 6;
+}
+
+message BinaryStatistics {
+  // sum will store the total binary blob length in a stripe
+  optional sint64 sum = 1;
+}
+
+// Statistics for list and map
+message CollectionStatistics {
+  optional uint64 minChildren = 1;
+  optional uint64 maxChildren = 2;
+  optional uint64 totalChildren = 3;
+}
+
+message ColumnStatistics {
+  optional uint64 numberOfValues = 1;
+  optional IntegerStatistics intStatistics = 2;
+  optional DoubleStatistics doubleStatistics = 3;
+  optional StringStatistics stringStatistics = 4;
+  optional BucketStatistics bucketStatistics = 5;
+  optional DecimalStatistics decimalStatistics = 6;
+  optional DateStatistics dateStatistics = 7;
+  optional BinaryStatistics binaryStatistics = 8;
+  optional TimestampStatistics timestampStatistics = 9;
+  optional bool hasNull = 10;
+  optional uint64 bytesOnDisk = 11;
+  optional CollectionStatistics collectionStatistics = 12;
+}
+
+message RowIndexEntry {
+  repeated uint64 positions = 1 [packed=true];
+  optional ColumnStatistics statistics = 2;
+}
+
+message RowIndex {
+  repeated RowIndexEntry entry = 1;
+}
+
+message BloomFilter {
+  optional uint32 numHashFunctions = 1;
+  repeated fixed64 bitset = 2;
+  optional bytes utf8bitset = 3;
+}
+
+message BloomFilterIndex {
+  repeated BloomFilter bloomFilter = 1;
+}
+
+message Stream {
+  // if you add new index stream kinds, you need to make sure to update
+  // StreamName to ensure it is added to the stripe in the right area
+  enum Kind {
+    PRESENT = 0;
+    DATA = 1;
+    LENGTH = 2;
+    DICTIONARY_DATA = 3;
+    DICTIONARY_COUNT = 4;
+    SECONDARY = 5;
+    ROW_INDEX = 6;
+    BLOOM_FILTER = 7;
+    BLOOM_FILTER_UTF8 = 8;
+    // Virtual stream kinds to allocate space for encrypted index and data.
+    ENCRYPTED_INDEX = 9;
+    ENCRYPTED_DATA = 10;
+
+    // stripe statistics streams
+    STRIPE_STATISTICS = 100;
+    // A virtual stream kind that is used for setting the encryption IV.
+    FILE_STATISTICS = 101;
+  }
+  optional Kind kind = 1;
+  optional uint32 column = 2;
+  optional uint64 length = 3;
+}
+
+message ColumnEncoding {
+  enum Kind {
+    DIRECT = 0;
+    DICTIONARY = 1;
+    DIRECT_V2 = 2;
+    DICTIONARY_V2 = 3;
+  }
+  optional Kind kind = 1;
+  optional uint32 dictionarySize = 2;
+
+  // The encoding of the bloom filters for this column:
+  //   0 or missing = none or original
+  //   1            = ORC-135 (utc for timestamps)
+  optional uint32 bloomEncoding = 3;
+}
+
+message StripeEncryptionVariant {
+  repeated Stream streams = 1;
+  repeated ColumnEncoding encoding = 2;
+}
+
+// each stripe looks like:
+//   index streams
+//     unencrypted
+//     variant 1..N
+//   data streams
+//     unencrypted
+//     variant 1..N
+//   footer
+
+message StripeFooter {
+  repeated Stream streams = 1;
+  repeated ColumnEncoding columns = 2;
+  optional string writerTimezone = 3;
+  // one for each column encryption variant
+  repeated StripeEncryptionVariant encryption = 4;
+}
+
+// the file tail looks like:
+//   encrypted stripe statistics: ColumnarStripeStatistics (order by variant)
+//   stripe statistics: Metadata
+//   footer: Footer
+//   postscript: PostScript
+//   psLen: byte
+
+message StringPair {
+  optional string key = 1;
+  optional string value = 2;
+}
+
+message Type {
+  enum Kind {
+    BOOLEAN = 0;
+    BYTE = 1;
+    SHORT = 2;
+    INT = 3;
+    LONG = 4;
+    FLOAT = 5;
+    DOUBLE = 6;
+    STRING = 7;
+    BINARY = 8;
+    TIMESTAMP = 9;
+    LIST = 10;
+    MAP = 11;
+    STRUCT = 12;
+    UNION = 13;
+    DECIMAL = 14;
+    DATE = 15;
+    VARCHAR = 16;
+    CHAR = 17;
+    TIMESTAMP_INSTANT = 18;
+  }
+  optional Kind kind = 1;
+  repeated uint32 subtypes = 2 [packed=true];
+  repeated string fieldNames = 3;
+  optional uint32 maximumLength = 4;
+  optional uint32 precision = 5;
+  optional uint32 scale = 6;
+  repeated StringPair attributes = 7;
+}
+
+message StripeInformation {
+  // the global file offset of the start of the stripe
+  optional uint64 offset = 1;
+  // the number of bytes of index
+  optional uint64 indexLength = 2;
+  // the number of bytes of data
+  optional uint64 dataLength = 3;
+  // the number of bytes in the stripe footer
+  optional uint64 footerLength = 4;
+  // the number of rows in this stripe
+  optional uint64 numberOfRows = 5;
+  // If this is present, the reader should use this value for the encryption
+  // stripe id for setting the encryption IV. Otherwise, the reader should
+  // use one larger than the previous stripe's encryptStripeId.
+  // For unmerged ORC files, the first stripe will use 1 and the rest of the
+  // stripes won't have it set. For merged files, the stripe information
+  // will be copied from their original files and thus the first stripe of
+  // each of the input files will reset it to 1.
+  // Note that 1 was choosen, because protobuf v3 doesn't serialize
+  // primitive types that are the default (eg. 0).
+  optional uint64 encryptStripeId = 6;
+  // For each encryption variant, the new encrypted local key to use
+  // until we find a replacement.
+  repeated bytes encryptedLocalKeys = 7;
+}
+
+message UserMetadataItem {
+  optional string name = 1;
+  optional bytes value = 2;
+}
+
+// StripeStatistics (1 per a stripe), which each contain the
+// ColumnStatistics for each column.
+// This message type is only used in ORC v0 and v1.
+message StripeStatistics {
+  repeated ColumnStatistics colStats = 1;
+}
+
+// This message type is only used in ORC v0 and v1.
+message Metadata {
+  repeated StripeStatistics stripeStats = 1;
+}
+
+// In ORC v2 (and for encrypted columns in v1), each column has
+// their column statistics written separately.
+message ColumnarStripeStatistics {
+  // one value for each stripe in the file
+  repeated ColumnStatistics colStats = 1;
+}
+
+enum EncryptionAlgorithm {
+  UNKNOWN_ENCRYPTION = 0;  // used for detecting future algorithms
+  AES_CTR_128 = 1;
+  AES_CTR_256 = 2;
+}
+
+message FileStatistics {
+  repeated ColumnStatistics column = 1;
+}
+
+// How was the data masked? This isn't necessary for reading the file, but
+// is documentation about how the file was written.
+message DataMask {
+  // the kind of masking, which may include third party masks
+  optional string name = 1;
+  // parameters for the mask
+  repeated string maskParameters = 2;
+  // the unencrypted column roots this mask was applied to
+  repeated uint32 columns = 3 [packed = true];
+}
+
+// Information about the encryption keys.
+message EncryptionKey {
+  optional string keyName = 1;
+  optional uint32 keyVersion = 2;
+  optional EncryptionAlgorithm algorithm = 3;
+}
+
+// The description of an encryption variant.
+// Each variant is a single subtype that is encrypted with a single key.
+message EncryptionVariant {
+  // the column id of the root
+  optional uint32 root = 1;
+  // The master key that was used to encrypt the local key, referenced as
+  // an index into the Encryption.key list.
+  optional uint32 key = 2;
+  // the encrypted key for the file footer
+  optional bytes encryptedKey = 3;
+  // the stripe statistics for this variant
+  repeated Stream stripeStatistics = 4;
+  // encrypted file statistics as a FileStatistics
+  optional bytes fileStatistics = 5;
+}
+
+// Which KeyProvider encrypted the local keys.
+enum KeyProviderKind {
+  UNKNOWN = 0;
+  HADOOP = 1;
+  AWS = 2;
+  GCP = 3;
+  AZURE = 4;
+}
+
+message Encryption {
+  // all of the masks used in this file
+  repeated DataMask mask = 1;
+  // all of the keys used in this file
+  repeated EncryptionKey key = 2;
+  // The encrypted variants.
+  // Readers should prefer the first variant that the user has access to
+  // the corresponding key. If they don't have access to any of the keys,
+  // they should get the unencrypted masked data.
+  repeated EncryptionVariant variants = 3;
+  // How are the local keys encrypted?
+  optional KeyProviderKind keyProvider = 4;
+}
+
+enum CalendarKind {
+  UNKNOWN_CALENDAR = 0;
+   // A hybrid Julian/Gregorian calendar with a cutover point in October 1582.
+  JULIAN_GREGORIAN = 1;
+  // A calendar that extends the Gregorian calendar back forever.
+  PROLEPTIC_GREGORIAN = 2;
+}
+
+message Footer {
+  optional uint64 headerLength = 1;
+  optional uint64 contentLength = 2;
+  repeated StripeInformation stripes = 3;
+  repeated Type types = 4;
+  repeated UserMetadataItem metadata = 5;
+  optional uint64 numberOfRows = 6;
+  repeated ColumnStatistics statistics = 7;
+  optional uint32 rowIndexStride = 8;
+
+  // Each implementation that writes ORC files should register for a code
+  // 0 = ORC Java
+  // 1 = ORC C++
+  // 2 = Presto
+  // 3 = Scritchley Go from https://github.com/scritchley/orc
+  // 4 = Trino
+  optional uint32 writer = 9;
+
+  // information about the encryption in this file
+  optional Encryption encryption = 10;
+  optional CalendarKind calendar = 11;
+
+  // informative description about the version of the software that wrote
+  // the file. It is assumed to be within a given writer, so for example
+  // ORC 1.7.2 = "1.7.2". It may include suffixes, such as "-SNAPSHOT".
+  optional string softwareVersion = 12;
+}
+
+enum CompressionKind {
+  NONE = 0;
+  ZLIB = 1;
+  SNAPPY = 2;
+  LZO = 3;
+  LZ4 = 4;
+  ZSTD = 5;
+}
+
+// Serialized length must be less that 255 bytes
+message PostScript {
+  optional uint64 footerLength = 1;
+  optional CompressionKind compression = 2;
+  optional uint64 compressionBlockSize = 3;
+  // the version of the file format
+  //   [0, 11] = Hive 0.11
+  //   [0, 12] = Hive 0.12
+  repeated uint32 version = 4 [packed = true];
+  optional uint64 metadataLength = 5;
+
+  // The version of the writer that wrote the file. This number is
+  // updated when we make fixes or large changes to the writer so that
+  // readers can detect whether a given bug is present in the data.
+  //
+  // Only the Java ORC writer may use values under 6 (or missing) so that
+  // readers that predate ORC-202 treat the new writers correctly. Each
+  // writer should assign their own sequence of versions starting from 6.
+  //
+  // Version of the ORC Java writer:
+  //   0 = original
+  //   1 = HIVE-8732 fixed (fixed stripe/file maximum statistics &
+  //                        string statistics use utf8 for min/max)
+  //   2 = HIVE-4243 fixed (use real column names from Hive tables)
+  //   3 = HIVE-12055 added (vectorized writer implementation)
+  //   4 = HIVE-13083 fixed (decimals write present stream correctly)
+  //   5 = ORC-101 fixed (bloom filters use utf8 consistently)
+  //   6 = ORC-135 fixed (timestamp statistics use utc)
+  //   7 = ORC-517 fixed (decimal64 min/max incorrect)
+  //   8 = ORC-203 added (trim very long string statistics)
+  //   9 = ORC-14 added (column encryption)
+  //
+  // Version of the ORC C++ writer:
+  //   6 = original
+  //
+  // Version of the Presto writer:
+  //   6 = original
+  //
+  // Version of the Scritchley Go writer:
+  //   6 = original
+  //
+  // Version of the Trino writer:
+  //   6 = original
+  //
+  optional uint32 writerVersion = 6;
+
+  // the number of bytes in the encrypted stripe statistics
+  optional uint64 stripeStatisticsLength = 7;
+
+  // Leave this last in the record
+  optional string magic = 8000;
+}
+
+// The contents of the file tail that must be serialized.
+// This gets serialized as part of OrcSplit, also used by footer cache.
+message FileTail {
+  optional PostScript postscript = 1;
+  optional Footer footer = 2;
+  optional uint64 fileLength = 3;
+  optional uint64 postscriptLength = 4;
+}