From 56419be9c9f8f86f9189f6835466f97f434f1c33 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Wed, 18 Dec 2024 14:36:38 +1300 Subject: [PATCH] Tidy up ordinal types --- parquet/src/arrow/arrow_reader/mod.rs | 2 +- parquet/src/encryption/ciphers.rs | 46 +++++++++++---------------- parquet/src/file/serialized_reader.rs | 21 +++++------- 3 files changed, 27 insertions(+), 42 deletions(-) diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 9ef142cec7d4..7654ceb0fce7 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -698,7 +698,7 @@ impl Iterator for ReaderPageIterator { let file_decryptor = Arc::new(self.metadata.file_decryptor().clone().unwrap()); let crypto_context = CryptoContext::new( - rg_idx as i16, self.column_idx as i16, file_decryptor.clone(), file_decryptor); + rg_idx, self.column_idx, file_decryptor.clone(), file_decryptor); let crypto_context = Arc::new(crypto_context); let ret = SerializedPageReader::new(reader, meta, total_rows, page_locations, Some(crypto_context)); diff --git a/parquet/src/encryption/ciphers.rs b/parquet/src/encryption/ciphers.rs index f795da547933..6ecbb003dd5b 100644 --- a/parquet/src/encryption/ciphers.rs +++ b/parquet/src/encryption/ciphers.rs @@ -170,15 +170,15 @@ pub(crate) enum ModuleType { } pub fn create_footer_aad(file_aad: &[u8]) -> Result> { - create_module_aad(file_aad, ModuleType::Footer, -1, -1, None) + create_module_aad(file_aad, ModuleType::Footer, 0, 0, None) } -pub fn create_page_aad(file_aad: &[u8], module_type: ModuleType, row_group_ordinal: i16, column_ordinal: i16, page_ordinal: Option) -> Result> { +pub fn create_page_aad(file_aad: &[u8], module_type: ModuleType, row_group_ordinal: usize, column_ordinal: usize, page_ordinal: Option) -> Result> { create_module_aad(file_aad, module_type, row_group_ordinal, column_ordinal, page_ordinal) } -pub fn create_module_aad(file_aad: &[u8], module_type: ModuleType, row_group_ordinal: i16, - column_ordinal: i16, page_ordinal: Option) -> Result> { +fn create_module_aad(file_aad: &[u8], module_type: ModuleType, row_group_ordinal: usize, + column_ordinal: usize, page_ordinal: Option) -> Result> { let module_buf = [module_type as u8]; @@ -189,19 +189,11 @@ pub fn create_module_aad(file_aad: &[u8], module_type: ModuleType, row_group_ord return Ok(aad) } - if row_group_ordinal < 0 { - return Err(general_err!("Wrong row group ordinal: {}", row_group_ordinal)); - } - // todo: this check is a noop here - if row_group_ordinal > i16::MAX { + if row_group_ordinal > i16::MAX as usize { return Err(general_err!("Encrypted parquet files can't have more than {} row groups: {}", i16::MAX, row_group_ordinal)); } - if column_ordinal < 0 { - return Err(general_err!("Wrong column ordinal: {}", column_ordinal)); - } - // todo: this check is a noop here - if column_ordinal > i16::MAX { + if column_ordinal > i16::MAX as usize { return Err(general_err!("Encrypted parquet files can't have more than {} columns: {}", i16::MAX, column_ordinal)); } @@ -219,16 +211,17 @@ pub fn create_module_aad(file_aad: &[u8], module_type: ModuleType, row_group_ord let page_ordinal = page_ordinal.ok_or_else(|| general_err!( "Page ordinal must be set for data pages"))?; - if page_ordinal < 0 { - return Err(general_err!("Wrong page ordinal: {}", page_ordinal)); + if page_ordinal > i16::MAX as usize { + return Err(general_err!("Encrypted parquet files can't have more than {} pages per column chunk: {}", + i16::MAX, page_ordinal)); } let mut aad = Vec::with_capacity(file_aad.len() + 7); aad.extend_from_slice(file_aad); aad.extend_from_slice(module_buf.as_ref()); - aad.extend_from_slice(row_group_ordinal.to_le_bytes().as_ref()); - aad.extend_from_slice(column_ordinal.to_le_bytes().as_ref()); - aad.extend_from_slice(page_ordinal.to_le_bytes().as_ref()); + aad.extend_from_slice((row_group_ordinal as i16).to_le_bytes().as_ref()); + aad.extend_from_slice((column_ordinal as i16).to_le_bytes().as_ref()); + aad.extend_from_slice((page_ordinal as i16).to_le_bytes().as_ref()); Ok(aad) } @@ -317,17 +310,17 @@ impl FileDecryptor { #[derive(Debug, Clone)] pub struct CryptoContext { - pub(crate) row_group_ordinal: i16, - pub(crate) column_ordinal: i16, - pub(crate) page_ordinal: Option, + pub(crate) row_group_ordinal: usize, + pub(crate) column_ordinal: usize, + pub(crate) page_ordinal: Option, pub(crate) dictionary_page: bool, pub(crate) data_decryptor: Arc, pub(crate) metadata_decryptor: Arc, } impl CryptoContext { - pub fn new(row_group_ordinal: i16, - column_ordinal: i16, data_decryptor: Arc, + pub fn new(row_group_ordinal: usize, + column_ordinal: usize, data_decryptor: Arc, metadata_decryptor: Arc) -> Self { Self { row_group_ordinal, @@ -339,7 +332,7 @@ impl CryptoContext { } } - pub fn with_page_ordinal(&self, page_ordinal: i16) -> Self { + pub fn with_page_ordinal(&self, page_ordinal: usize) -> Self { Self { row_group_ordinal: self.row_group_ordinal, column_ordinal: self.column_ordinal, @@ -361,9 +354,6 @@ impl CryptoContext { } } - pub fn row_group_ordinal(&self) -> &i16 { &self.row_group_ordinal } - pub fn column_ordinal(&self) -> &i16 { &self.column_ordinal } - pub fn page_ordinal(&self) -> &Option { &self.page_ordinal } pub fn data_decryptor(&self) -> Arc { self.data_decryptor.clone()} pub fn metadata_decryptor(&self) -> Arc { self.metadata_decryptor.clone() } } diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs index 427f05c512e0..9891e2c45ffb 100644 --- a/parquet/src/file/serialized_reader.rs +++ b/parquet/src/file/serialized_reader.rs @@ -33,17 +33,14 @@ use crate::file::{ reader::*, statistics, }; -use crate::format::{PageHeader, PageLocation, PageType, FileCryptoMetaData as TFileCryptoMetaData, EncryptionAlgorithm}; +use crate::format::{PageHeader, PageLocation, PageType}; use crate::record::reader::RowIter; use crate::record::Row; use crate::schema::types::Type as SchemaType; use crate::thrift::{TCompactSliceInputProtocol, TSerializable}; use bytes::Bytes; -use num::ToPrimitive; -use thrift::protocol::{TCompactInputProtocol, TInputProtocol}; -use zstd::zstd_safe::WriteBuf; -use crate::data_type::AsBytes; -use crate::encryption::ciphers::{create_page_aad, BlockDecryptor, CryptoContext, FileDecryptionProperties, ModuleType}; +use thrift::protocol::TCompactInputProtocol; +use crate::encryption::ciphers::{create_page_aad, BlockDecryptor, CryptoContext, ModuleType}; impl TryFrom for SerializedFileReader { type Error = ParquetError; @@ -857,14 +854,12 @@ impl PageReader for SerializedPageReader { } fn page_crypto_context(crypto_context: &Option>, page_ordinal: usize, dictionary_page: bool) -> Result>> { - let page_ordinal = page_ordinal - .to_i16() - .ok_or_else(|| general_err!( - "Page ordinal {} is greater than the maximum allowed in encrypted Parquet files ({})", - page_ordinal, i16::MAX))?; - Ok(crypto_context.as_ref().map( - |c| Arc::new(if dictionary_page { c.for_dictionary_page() } else { c.with_page_ordinal(page_ordinal) }))) + |c| Arc::new(if dictionary_page { + c.for_dictionary_page() + } else { + c.with_page_ordinal(page_ordinal) + }))) } #[cfg(test)]