Skip to content

Commit f3baa80

Browse files
authored
[thrift-remodel] Add macro to reduce boilerplate necessary to implement Thrift serialization (#8634)
# Which issue does this PR close? We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. - Part of #5853. # Rationale for this change Structures that are to be Thrift serialized as fields in another struct must implement the `WriteThriftField` trait. In most instances the implementation of that trait is trivial and adds unnecessary verbosity to the code. # What changes are included in this PR? Add a new `write_thrift_field` macro that generates this boilerplate code. # Are these changes tested? Covered by existing tests # Are there any user-facing changes? No
1 parent caeb4d2 commit f3baa80

File tree

5 files changed

+65
-164
lines changed

5 files changed

+65
-164
lines changed

parquet/THRIFT.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,32 @@ optional fields it is. A typical `write_thrift` implementation will look like:
406406
}
407407
```
408408

409+
In most instances, the `WriteThriftField` implementation can be handled by the `write_thrift_field`
410+
macro. The first argument is the unqualified name of an object that implements `WriteThrift`, and
411+
the second is the field type (which will be `FieldType::Struct` for Thrift structs and unions,
412+
and `FieldType::I32` for Thrift enums).
413+
414+
```rust
415+
write_thrift_field!(MyNewStruct, FieldType::Struct);
416+
```
417+
418+
which expands to:
419+
420+
```rust
421+
impl WriteThriftField for MyNewStruct {
422+
fn write_thrift_field<W: Write>(
423+
&self,
424+
writer: &mut ThriftCompactOutputProtocol<W>,
425+
field_id: i16,
426+
last_field_id: i16,
427+
) -> Result<i16> {
428+
writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
429+
self.write_thrift(writer)?;
430+
Ok(field_id)
431+
}
432+
}
433+
```
434+
409435
### Handling for lists
410436

411437
Lists of serialized objects can usually be read using `parquet_thrift::read_thrift_vec` and written

parquet/src/basic.rs

Lines changed: 5 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use crate::parquet_thrift::{
3030
ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol,
3131
WriteThrift, WriteThriftField,
3232
};
33-
use crate::{thrift_enum, thrift_struct, thrift_union_all_empty};
33+
use crate::{thrift_enum, thrift_struct, thrift_union_all_empty, write_thrift_field};
3434

3535
use crate::errors::{ParquetError, Result};
3636

@@ -210,18 +210,7 @@ impl WriteThrift for ConvertedType {
210210
}
211211
}
212212

213-
impl WriteThriftField for ConvertedType {
214-
fn write_thrift_field<W: Write>(
215-
&self,
216-
writer: &mut ThriftCompactOutputProtocol<W>,
217-
field_id: i16,
218-
last_field_id: i16,
219-
) -> Result<i16> {
220-
writer.write_field_begin(FieldType::I32, field_id, last_field_id)?;
221-
self.write_thrift(writer)?;
222-
Ok(field_id)
223-
}
224-
}
213+
write_thrift_field!(ConvertedType, FieldType::I32);
225214

226215
// ----------------------------------------------------------------------
227216
// Mirrors thrift union `TimeUnit`
@@ -584,18 +573,7 @@ impl WriteThrift for LogicalType {
584573
}
585574
}
586575

587-
impl WriteThriftField for LogicalType {
588-
fn write_thrift_field<W: Write>(
589-
&self,
590-
writer: &mut ThriftCompactOutputProtocol<W>,
591-
field_id: i16,
592-
last_field_id: i16,
593-
) -> Result<i16> {
594-
writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
595-
self.write_thrift(writer)?;
596-
Ok(field_id)
597-
}
598-
}
576+
write_thrift_field!(LogicalType, FieldType::Struct);
599577

600578
// ----------------------------------------------------------------------
601579
// Mirrors thrift enum `FieldRepetitionType`
@@ -928,18 +906,7 @@ impl WriteThrift for Compression {
928906
}
929907
}
930908

931-
impl WriteThriftField for Compression {
932-
fn write_thrift_field<W: Write>(
933-
&self,
934-
writer: &mut ThriftCompactOutputProtocol<W>,
935-
field_id: i16,
936-
last_field_id: i16,
937-
) -> Result<i16> {
938-
writer.write_field_begin(FieldType::I32, field_id, last_field_id)?;
939-
self.write_thrift(writer)?;
940-
Ok(field_id)
941-
}
942-
}
909+
write_thrift_field!(Compression, FieldType::I32);
943910

944911
impl Compression {
945912
/// Returns the codec type of this compression setting as a string, without the compression
@@ -1116,18 +1083,7 @@ impl WriteThrift for EdgeInterpolationAlgorithm {
11161083
}
11171084
}
11181085

1119-
impl WriteThriftField for EdgeInterpolationAlgorithm {
1120-
fn write_thrift_field<W: Write>(
1121-
&self,
1122-
writer: &mut ThriftCompactOutputProtocol<W>,
1123-
field_id: i16,
1124-
last_field_id: i16,
1125-
) -> Result<i16> {
1126-
writer.write_field_begin(FieldType::I32, field_id, last_field_id)?;
1127-
self.write_thrift(writer)?;
1128-
Ok(field_id)
1129-
}
1130-
}
1086+
write_thrift_field!(EdgeInterpolationAlgorithm, FieldType::I32);
11311087

11321088
impl Default for EdgeInterpolationAlgorithm {
11331089
fn default() -> Self {

parquet/src/file/metadata/thrift/mod.rs

Lines changed: 7 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ use crate::{
5858
},
5959
thrift_struct,
6060
util::bit_util::FromBytes,
61+
write_thrift_field,
6162
};
6263

6364
// this needs to be visible to the schema conversion code
@@ -1521,18 +1522,9 @@ impl WriteThrift for crate::geospatial::statistics::GeospatialStatistics {
15211522
}
15221523
}
15231524

1524-
impl WriteThriftField for crate::geospatial::statistics::GeospatialStatistics {
1525-
fn write_thrift_field<W: Write>(
1526-
&self,
1527-
writer: &mut ThriftCompactOutputProtocol<W>,
1528-
field_id: i16,
1529-
last_field_id: i16,
1530-
) -> Result<i16> {
1531-
writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
1532-
self.write_thrift(writer)?;
1533-
Ok(field_id)
1534-
}
1535-
}
1525+
// macro cannot handle qualified names
1526+
use crate::geospatial::statistics::GeospatialStatistics as RustGeospatialStatistics;
1527+
write_thrift_field!(RustGeospatialStatistics, FieldType::Struct);
15361528

15371529
// struct BoundingBox {
15381530
// 1: required double xmin;
@@ -1570,18 +1562,9 @@ impl WriteThrift for crate::geospatial::bounding_box::BoundingBox {
15701562
}
15711563
}
15721564

1573-
impl WriteThriftField for crate::geospatial::bounding_box::BoundingBox {
1574-
fn write_thrift_field<W: Write>(
1575-
&self,
1576-
writer: &mut ThriftCompactOutputProtocol<W>,
1577-
field_id: i16,
1578-
last_field_id: i16,
1579-
) -> Result<i16> {
1580-
writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
1581-
self.write_thrift(writer)?;
1582-
Ok(field_id)
1583-
}
1584-
}
1565+
// macro cannot handle qualified names
1566+
use crate::geospatial::bounding_box::BoundingBox as RustBoundingBox;
1567+
write_thrift_field!(RustBoundingBox, FieldType::Struct);
15851568

15861569
#[cfg(test)]
15871570
pub(crate) mod tests {

parquet/src/parquet_macros.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,21 @@ macro_rules! thrift_struct {
293293
}
294294
}
295295

296+
#[doc(hidden)]
297+
#[macro_export]
298+
/// Generate `WriteThriftField` implementation for a struct.
299+
macro_rules! write_thrift_field {
300+
($identifier:ident $(< $lt:lifetime >)?, $fld_type:expr) => {
301+
impl $(<$lt>)? WriteThriftField for $identifier $(<$lt>)? {
302+
fn write_thrift_field<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>, field_id: i16, last_field_id: i16) -> Result<i16> {
303+
writer.write_field_begin($fld_type, field_id, last_field_id)?;
304+
self.write_thrift(writer)?;
305+
Ok(field_id)
306+
}
307+
}
308+
}
309+
}
310+
296311
#[doc(hidden)]
297312
#[macro_export]
298313
macro_rules! __thrift_write_required_or_optional_field {

parquet/src/parquet_thrift.rs

Lines changed: 12 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@ use std::{
3131
io::{Read, Write},
3232
};
3333

34-
use crate::errors::{ParquetError, Result};
34+
use crate::{
35+
errors::{ParquetError, Result},
36+
write_thrift_field,
37+
};
3538

3639
/// Wrapper for thrift `double` fields. This is used to provide
3740
/// an implementation of `Eq` for floats. This implementation
@@ -913,6 +916,7 @@ pub(crate) trait WriteThriftField {
913916
) -> Result<i16>;
914917
}
915918

919+
// bool struct fields are written differently to bool values
916920
impl WriteThriftField for bool {
917921
fn write_thrift_field<W: Write>(
918922
&self,
@@ -929,83 +933,13 @@ impl WriteThriftField for bool {
929933
}
930934
}
931935

932-
impl WriteThriftField for i8 {
933-
fn write_thrift_field<W: Write>(
934-
&self,
935-
writer: &mut ThriftCompactOutputProtocol<W>,
936-
field_id: i16,
937-
last_field_id: i16,
938-
) -> Result<i16> {
939-
writer.write_field_begin(FieldType::Byte, field_id, last_field_id)?;
940-
writer.write_i8(*self)?;
941-
Ok(field_id)
942-
}
943-
}
944-
945-
impl WriteThriftField for i16 {
946-
fn write_thrift_field<W: Write>(
947-
&self,
948-
writer: &mut ThriftCompactOutputProtocol<W>,
949-
field_id: i16,
950-
last_field_id: i16,
951-
) -> Result<i16> {
952-
writer.write_field_begin(FieldType::I16, field_id, last_field_id)?;
953-
writer.write_i16(*self)?;
954-
Ok(field_id)
955-
}
956-
}
957-
958-
impl WriteThriftField for i32 {
959-
fn write_thrift_field<W: Write>(
960-
&self,
961-
writer: &mut ThriftCompactOutputProtocol<W>,
962-
field_id: i16,
963-
last_field_id: i16,
964-
) -> Result<i16> {
965-
writer.write_field_begin(FieldType::I32, field_id, last_field_id)?;
966-
writer.write_i32(*self)?;
967-
Ok(field_id)
968-
}
969-
}
970-
971-
impl WriteThriftField for i64 {
972-
fn write_thrift_field<W: Write>(
973-
&self,
974-
writer: &mut ThriftCompactOutputProtocol<W>,
975-
field_id: i16,
976-
last_field_id: i16,
977-
) -> Result<i16> {
978-
writer.write_field_begin(FieldType::I64, field_id, last_field_id)?;
979-
writer.write_i64(*self)?;
980-
Ok(field_id)
981-
}
982-
}
983-
984-
impl WriteThriftField for OrderedF64 {
985-
fn write_thrift_field<W: Write>(
986-
&self,
987-
writer: &mut ThriftCompactOutputProtocol<W>,
988-
field_id: i16,
989-
last_field_id: i16,
990-
) -> Result<i16> {
991-
writer.write_field_begin(FieldType::Double, field_id, last_field_id)?;
992-
writer.write_double(self.0)?;
993-
Ok(field_id)
994-
}
995-
}
996-
997-
impl WriteThriftField for f64 {
998-
fn write_thrift_field<W: Write>(
999-
&self,
1000-
writer: &mut ThriftCompactOutputProtocol<W>,
1001-
field_id: i16,
1002-
last_field_id: i16,
1003-
) -> Result<i16> {
1004-
writer.write_field_begin(FieldType::Double, field_id, last_field_id)?;
1005-
writer.write_double(*self)?;
1006-
Ok(field_id)
1007-
}
1008-
}
936+
write_thrift_field!(i8, FieldType::Byte);
937+
write_thrift_field!(i16, FieldType::I16);
938+
write_thrift_field!(i32, FieldType::I32);
939+
write_thrift_field!(i64, FieldType::I64);
940+
write_thrift_field!(OrderedF64, FieldType::Double);
941+
write_thrift_field!(f64, FieldType::Double);
942+
write_thrift_field!(String, FieldType::Binary);
1009943

1010944
impl WriteThriftField for &[u8] {
1011945
fn write_thrift_field<W: Write>(
@@ -1033,19 +967,6 @@ impl WriteThriftField for &str {
1033967
}
1034968
}
1035969

1036-
impl WriteThriftField for String {
1037-
fn write_thrift_field<W: Write>(
1038-
&self,
1039-
writer: &mut ThriftCompactOutputProtocol<W>,
1040-
field_id: i16,
1041-
last_field_id: i16,
1042-
) -> Result<i16> {
1043-
writer.write_field_begin(FieldType::Binary, field_id, last_field_id)?;
1044-
writer.write_bytes(self.as_bytes())?;
1045-
Ok(field_id)
1046-
}
1047-
}
1048-
1049970
impl<T> WriteThriftField for Vec<T>
1050971
where
1051972
T: WriteThrift,

0 commit comments

Comments
 (0)