diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs index 95ff109a3dd0..b84c57a60e19 100644 --- a/parquet/src/file/writer.rs +++ b/parquet/src/file/writer.rs @@ -378,7 +378,12 @@ fn write_bloom_filters( .ordinal() .expect("Missing row group ordinal") .try_into() - .expect("Negative row group ordinal"); + .map_err(|_| { + ParquetError::General(format!( + "Negative row group ordinal: {})", + row_group.ordinal().unwrap() + )) + })?; let row_group_idx = row_group_idx as usize; for (column_idx, column_chunk) in row_group.columns_mut().iter_mut().enumerate() { if let Some(bloom_filter) = bloom_filters[row_group_idx][column_idx].take() { @@ -1892,6 +1897,44 @@ mod tests { assert_eq!(page_sizes[0], unenc_size); } + #[test] + fn test_too_many_rowgroups() { + let message_type = " + message test_schema { + REQUIRED BYTE_ARRAY a (UTF8); + } + "; + let schema = Arc::new(parse_message_type(message_type).unwrap()); + let file: File = tempfile::tempfile().unwrap(); + let props = Arc::new( + WriterProperties::builder() + .set_statistics_enabled(EnabledStatistics::None) + .set_max_row_group_size(1) + .build(), + ); + let mut writer = SerializedFileWriter::new(&file, schema, props).unwrap(); + + // Create 32k empty rowgroups. Should error when i == 32768. + for i in 0..0x8001 { + match writer.next_row_group() { + Ok(mut row_group_writer) => { + assert_ne!(i, 0x8000); + let col_writer = row_group_writer.next_column().unwrap().unwrap(); + col_writer.close().unwrap(); + row_group_writer.close().unwrap(); + } + Err(e) => { + assert_eq!(i, 0x8000); + assert_eq!( + e.to_string(), + "Parquet error: Parquet does not support more than 32767 row groups per file (currently: 32768)" + ); + } + } + } + writer.close().unwrap(); + } + #[test] fn test_size_statistics_with_repetition_and_nulls() { let message_type = "