Skip to content

Commit

Permalink
Add Hash to vortex-expr and therefore vortex-scalar (#1869)
Browse files Browse the repository at this point in the history
  • Loading branch information
joseph-isaacs authored Jan 9, 2025
1 parent 7c43675 commit b89b82e
Show file tree
Hide file tree
Showing 20 changed files with 76 additions and 43 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ datafusion-expr = "44.0.0"
datafusion-physical-expr = "44.0.0"
datafusion-physical-plan = "44.0.0"
divan = "0.1.14"
dyn-hash = "0.2.0"
enum-iterator = "2.0.0"
fastlanes = "0.1.5"
flatbuffers = "24.3.25"
Expand Down
2 changes: 1 addition & 1 deletion vortex-buffer/src/alignment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use vortex_error::VortexExpect;
///
/// This type is a wrapper around `usize` that ensures the alignment is a power of 2 and fits into
/// a `u16`.
#[derive(Clone, Debug, Copy, PartialEq, Eq, PartialOrd, Ord)]
#[derive(Clone, Debug, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Alignment(usize);

impl Alignment {
Expand Down
2 changes: 1 addition & 1 deletion vortex-buffer/src/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use vortex_error::{vortex_panic, VortexExpect};
use crate::{Alignment, BufferMut, ByteBuffer};

/// An immutable buffer of items of `T`.
#[derive(Clone, PartialEq, Eq, PartialOrd)]
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash)]
pub struct Buffer<T> {
pub(crate) bytes: Bytes,
pub(crate) length: usize,
Expand Down
2 changes: 1 addition & 1 deletion vortex-buffer/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::str::Utf8Error;
use crate::ByteBuffer;

/// A wrapper around a [`ByteBuffer`] that guarantees that the buffer contains valid UTF-8.
#[derive(Clone, PartialEq, Eq, PartialOrd)]
#[derive(Clone, PartialEq, Eq, PartialOrd, Hash)]
pub struct BufferString(ByteBuffer);

impl BufferString {
Expand Down
1 change: 1 addition & 0 deletions vortex-expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ workspace = true
[dependencies]
datafusion-expr = { workspace = true, optional = true }
datafusion-physical-expr = { workspace = true, optional = true }
dyn-hash = { workspace = true }
itertools = { workspace = true }
prost = { workspace = true, optional = true }
serde = { workspace = true, optional = true, features = ["derive"] }
Expand Down
6 changes: 3 additions & 3 deletions vortex-expr/src/binary.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::any::Any;
use std::fmt::Display;
use std::hash::Hash;
use std::sync::Arc;

use vortex_array::compute::{and_kleene, compare, or_kleene, Operator as ArrayOperator};
Expand All @@ -8,7 +9,8 @@ use vortex_error::VortexResult;

use crate::{ExprRef, Operator, VortexExpr};

#[derive(Debug, Clone)]
#[derive(Debug, Clone, Eq, Hash)]
#[allow(clippy::derived_hash_with_manual_eq)]
pub struct BinaryExpr {
lhs: ExprRef,
operator: Operator,
Expand Down Expand Up @@ -76,8 +78,6 @@ impl PartialEq for BinaryExpr {
}
}

impl Eq for BinaryExpr {}

/// Create a new `BinaryExpr` using the `Eq` operator.
///
/// ## Example usage
Expand Down
4 changes: 2 additions & 2 deletions vortex-expr/src/column.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::any::Any;
use std::fmt::Display;
use std::hash::Hash;
use std::sync::Arc;

use vortex_array::{ArrayDType, ArrayData};
Expand All @@ -8,7 +9,7 @@ use vortex_error::{vortex_err, VortexResult};

use crate::{ExprRef, VortexExpr};

#[derive(Debug, PartialEq, Hash, Clone, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Column {
field: Field,
}
Expand Down Expand Up @@ -57,7 +58,6 @@ impl VortexExpr for Column {
fn as_any(&self) -> &dyn Any {
self
}

fn evaluate(&self, batch: &ArrayData) -> VortexResult<ArrayData> {
batch
.as_struct_array()
Expand Down
14 changes: 3 additions & 11 deletions vortex-expr/src/get_item.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::any::Any;
use std::fmt::{Debug, Display, Formatter};
use std::hash::Hash;
use std::sync::Arc;

use vortex_array::ArrayData;
Expand All @@ -8,7 +9,8 @@ use vortex_error::{vortex_err, VortexResult};

use crate::{ExprRef, VortexExpr};

#[derive(Debug, Clone, Eq)]
#[derive(Debug, Clone, Eq, Hash)]
#[allow(clippy::derived_hash_with_manual_eq)]
pub struct GetItem {
field: Field,
child: ExprRef,
Expand All @@ -35,15 +37,6 @@ pub fn get_item(field: impl Into<Field>, child: ExprRef) -> ExprRef {
GetItem::new_expr(field, child)
}

impl PartialEq<dyn Any> for GetItem {
fn eq(&self, other: &dyn Any) -> bool {
other
.downcast_ref::<GetItem>()
.map(|item| self.field == item.field && self.child.eq(&item.child))
.unwrap_or(false)
}
}

impl Display for GetItem {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}.{}", self.child, self.field)
Expand All @@ -54,7 +47,6 @@ impl VortexExpr for GetItem {
fn as_any(&self) -> &dyn Any {
self
}

fn evaluate(&self, batch: &ArrayData) -> VortexResult<ArrayData> {
let child = self.child.evaluate(batch)?;
child
Expand Down
2 changes: 1 addition & 1 deletion vortex-expr/src/identity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use vortex_error::VortexResult;

use crate::{ExprRef, VortexExpr};

#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, PartialEq, Eq, Hash)]
pub struct Identity;

impl Identity {
Expand Down
6 changes: 5 additions & 1 deletion vortex-expr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ use std::any::Any;
use std::fmt::{Debug, Display};
use std::sync::Arc;

use dyn_hash::DynHash;

mod binary;
mod column;
pub mod datafusion;
Expand Down Expand Up @@ -42,7 +44,7 @@ use crate::traversal::{Node, ReferenceCollector};
pub type ExprRef = Arc<dyn VortexExpr>;

/// Represents logical operation on [`ArrayData`]s
pub trait VortexExpr: Debug + Send + Sync + DynEq + Display {
pub trait VortexExpr: Debug + Send + Sync + DynEq + DynHash + Display {
/// Convert expression reference to reference of [`Any`] type
fn as_any(&self) -> &dyn Any;

Expand Down Expand Up @@ -108,6 +110,8 @@ impl PartialEq for dyn VortexExpr {

impl Eq for dyn VortexExpr {}

dyn_hash::hash_trait_object!(VortexExpr);

#[cfg(test)]
mod tests {
use vortex_dtype::{DType, Field, Nullability, PType, StructDType};
Expand Down
6 changes: 3 additions & 3 deletions vortex-expr/src/like.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::any::Any;
use std::fmt::Display;
use std::hash::Hash;
use std::sync::Arc;

use vortex_array::compute::{like, LikeOptions};
Expand All @@ -8,7 +9,8 @@ use vortex_error::VortexResult;

use crate::{ExprRef, VortexExpr};

#[derive(Debug)]
#[derive(Debug, Eq, Hash)]
#[allow(clippy::derived_hash_with_manual_eq)]
pub struct Like {
child: ExprRef,
pattern: ExprRef,
Expand Down Expand Up @@ -96,8 +98,6 @@ impl PartialEq for Like {
}
}

impl Eq for Like {}

#[cfg(test)]
mod tests {
use vortex_array::array::BoolArray;
Expand Down
2 changes: 1 addition & 1 deletion vortex-expr/src/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use vortex_scalar::Scalar;

use crate::{ExprRef, VortexExpr};

#[derive(Debug, PartialEq, Eq)]
#[derive(Debug, PartialEq, Eq, Hash)]
pub struct Literal {
value: Scalar,
}
Expand Down
7 changes: 4 additions & 3 deletions vortex-expr/src/not.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::any::Any;
use std::fmt::Display;
use std::hash::Hash;
use std::sync::Arc;

use vortex_array::compute::invert;
Expand All @@ -8,7 +9,9 @@ use vortex_error::VortexResult;

use crate::{ExprRef, VortexExpr};

#[derive(Debug)]
#[derive(Debug, Eq, Hash)]
// We cannot auto derive PartialEq because ExprRef, since its a Arc<..> and derive doesn't work
#[allow(clippy::derived_hash_with_manual_eq)]
pub struct Not {
child: ExprRef,
}
Expand Down Expand Up @@ -56,8 +59,6 @@ impl PartialEq for Not {
}
}

impl Eq for Not {}

pub fn not(operand: ExprRef) -> ExprRef {
Not::new_expr(operand)
}
Expand Down
11 changes: 2 additions & 9 deletions vortex-expr/src/pack.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::any::Any;
use std::fmt::Display;
use std::hash::Hash;
use std::sync::Arc;

use itertools::Itertools as _;
Expand Down Expand Up @@ -37,7 +38,7 @@ use crate::{ExprRef, VortexExpr};
/// assert_eq!(scalar_at(&x_copy, 2).unwrap(), Scalar::from(200));
/// ```
///
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Pack {
names: FieldNames,
values: Vec<ExprRef>,
Expand Down Expand Up @@ -102,14 +103,6 @@ impl VortexExpr for Pack {
}
}

impl PartialEq<Pack> for Pack {
fn eq(&self, other: &Pack) -> bool {
self.names == other.names && self.values == other.values
}
}

impl Eq for Pack {}

#[cfg(test)]
mod tests {
use std::sync::Arc;
Expand Down
2 changes: 1 addition & 1 deletion vortex-expr/src/row_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use vortex_error::{VortexExpect, VortexResult};

use crate::{expr_project, split_conjunction, ExprRef, VortexExpr};

#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct RowFilter {
pub(crate) conjunction: Vec<ExprRef>,
}
Expand Down
5 changes: 3 additions & 2 deletions vortex-expr/src/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@ use vortex_error::{vortex_err, VortexResult};

use crate::{ExprRef, VortexExpr};

#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum SelectField {
Include(Vec<Field>),
Exclude(Vec<Field>),
}

#[derive(Debug, Clone, Eq)]
#[derive(Debug, Clone, Eq, Hash)]
#[allow(clippy::derived_hash_with_manual_eq)]
pub struct Select {
fields: SelectField,
child: ExprRef,
Expand Down
8 changes: 8 additions & 0 deletions vortex-scalar/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::cmp::Ordering;
use std::hash::Hash;
use std::mem::discriminant;
use std::sync::Arc;

Expand Down Expand Up @@ -221,6 +222,13 @@ impl PartialOrd for Scalar {
}
}

impl Hash for Scalar {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
discriminant(self.dtype()).hash(state);
self.value.0.hash(state);
}
}

impl AsRef<Self> for Scalar {
fn as_ref(&self) -> &Self {
self
Expand Down
27 changes: 26 additions & 1 deletion vortex-scalar/src/pvalue.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
use core::fmt::Display;
use std::cmp::Ordering;
use std::hash::{Hash, Hasher};
use std::mem;

use num_traits::NumCast;
use paste::paste;
use vortex_dtype::half::f16;
use vortex_dtype::{NativePType, PType};
use vortex_dtype::{NativePType, PType, ToBytes};
use vortex_error::{vortex_err, VortexError, VortexExpect};

#[derive(Debug, Clone, Copy)]
Expand Down Expand Up @@ -61,6 +62,30 @@ impl PartialOrd for PValue {
}
}

impl Hash for PValue {
fn hash<H: Hasher>(&self, state: &mut H) {
self.to_le_bytes().hash(state);
}
}

impl ToBytes for PValue {
fn to_le_bytes(&self) -> &[u8] {
match self {
PValue::U8(v) => v.to_le_bytes(),
PValue::U16(v) => v.to_le_bytes(),
PValue::U32(v) => v.to_le_bytes(),
PValue::U64(v) => v.to_le_bytes(),
PValue::I8(v) => v.to_le_bytes(),
PValue::I16(v) => v.to_le_bytes(),
PValue::I32(v) => v.to_le_bytes(),
PValue::I64(v) => v.to_le_bytes(),
PValue::F16(v) => v.to_le_bytes(),
PValue::F32(v) => v.to_le_bytes(),
PValue::F64(v) => v.to_le_bytes(),
}
}
}

macro_rules! as_primitive {
($T:ty, $PT:tt) => {
paste! {
Expand Down
4 changes: 2 additions & 2 deletions vortex-scalar/src/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ use crate::pvalue::PValue;
/// Note that these values can be deserialized from JSON or other formats. So a PValue may not
/// have the correct width for what the DType expects. Primitive values should therefore be
/// read using [crate::PrimitiveScalar] which will handle the conversion.
#[derive(Debug, Clone, PartialEq, PartialOrd)]
#[derive(Debug, Clone, PartialEq, PartialOrd, Hash)]
pub struct ScalarValue(pub(crate) InnerScalarValue);

#[derive(Debug, Clone, PartialEq, PartialOrd)]
#[derive(Debug, Clone, PartialEq, PartialOrd, Hash)]
pub(crate) enum InnerScalarValue {
Bool(bool),
Primitive(PValue),
Expand Down

0 comments on commit b89b82e

Please sign in to comment.