Skip to content

Commit

Permalink
feat: {Field,DataType}::size (#3149)
Browse files Browse the repository at this point in the history
Add a way to calculate in-memory size of `Field` and `DataType`.

Closes #3147.
  • Loading branch information
crepererum authored Nov 22, 2022
1 parent f091cbb commit a110004
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 0 deletions.
50 changes: 50 additions & 0 deletions arrow-schema/src/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,56 @@ impl DataType {
_ => self == other,
}
}

/// Return size of this instance in bytes.
///
/// Includes the size of `Self`.
pub fn size(&self) -> usize {
std::mem::size_of_val(self)
+ match self {
DataType::Null
| DataType::Boolean
| DataType::Int8
| DataType::Int16
| DataType::Int32
| DataType::Int64
| DataType::UInt8
| DataType::UInt16
| DataType::UInt32
| DataType::UInt64
| DataType::Float16
| DataType::Float32
| DataType::Float64
| DataType::Date32
| DataType::Date64
| DataType::Time32(_)
| DataType::Time64(_)
| DataType::Duration(_)
| DataType::Interval(_)
| DataType::Binary
| DataType::FixedSizeBinary(_)
| DataType::LargeBinary
| DataType::Utf8
| DataType::LargeUtf8
| DataType::Decimal128(_, _)
| DataType::Decimal256(_, _) => 0,
DataType::Timestamp(_, s) => {
s.as_ref().map(|s| s.capacity()).unwrap_or_default()
}
DataType::List(field)
| DataType::FixedSizeList(field, _)
| DataType::LargeList(field)
| DataType::Map(field, _) => field.size(),
DataType::Struct(fields) | DataType::Union(fields, _, _) => {
fields
.iter()
.map(|field| field.size() - std::mem::size_of_val(field))
.sum::<usize>()
+ (std::mem::size_of::<Field>() * fields.capacity())
}
DataType::Dictionary(dt1, dt2) => dt1.size() + dt2.size(),
}
}
}

#[cfg(test)]
Expand Down
15 changes: 15 additions & 0 deletions arrow-schema/src/field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,21 @@ impl Field {
}
}
}

/// Return size of this instance in bytes.
///
/// Includes the size of `Self`.
pub fn size(&self) -> usize {
std::mem::size_of_val(self) - std::mem::size_of_val(&self.data_type)
+ self.data_type.size()
+ self.name.capacity()
+ (std::mem::size_of::<(String, String)>() * self.metadata.capacity())
+ self
.metadata
.iter()
.map(|(k, v)| k.capacity() + v.capacity())
.sum::<usize>()
}
}

// TODO: improve display with crate https://crates.io/crates/derive_more ?
Expand Down

0 comments on commit a110004

Please sign in to comment.