Skip to content

Commit

Permalink
feat: support more types for get evaluator for maps (#532)
Browse files Browse the repository at this point in the history
Uses get index kernels to simplify evaluator types. Supports
`Boolean/String/Primitive` types for `get` on maps.

Part of #494
  • Loading branch information
jordanrfrazier authored Jul 21, 2023
1 parent 2cc43e3 commit 1037a91
Show file tree
Hide file tree
Showing 15 changed files with 449 additions and 286 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/sparrow-compiler/src/diagnostics/code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ SyntaxError(E0011, Error, "Invalid syntax", ""),
CyclicReference(E0012, Error, "Circular dependency", ""),
InvalidOutputType(E0013, Error, "Invalid output type", "https://kaskada.io/docs-site/kaskada/main/fenl/fenl-diagnostic-codes.html#e0013"),
InvalidNonConstArgument(E0014, Error, "Invalid non-constant argument", ""),
IncompatibleArgumentTypes(E0015, Error, "Incompatible argument types", ""),

// Bugs: 1000 - 1999
InternalError(B1000, Bug, "Internal error", ""),
Expand Down
50 changes: 34 additions & 16 deletions crates/sparrow-compiler/src/types/inference.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,29 +272,26 @@ fn solve_type_class(
}
}

if result.is_none() {
let distinct_arg_types = distinct_arg_types(types);
return Err(DiagnosticCode::IncompatibleArgumentTypes
.builder()
.with_label(
call.location()
.primary_label()
.with_message(format!("Incompatible types for call to '{call}'")),
)
.with_labels(distinct_arg_types));
}

result
// Promote the minimum type to be compatible with the type class, if necessary.
.and_then(|data_type| promote_concrete(data_type, type_class))
// Return an error if either (a) there wasn't a least-upper bound or
// (b) it wasn't possible to promote the least-upper bound to be compatible
// with the type type class.
.ok_or_else(|| {
// Only report each distinct type as a problem once. This reduces clutter in the
// error. TODO: Attempt to minimize the number of types involved
// further. Find a subset of types that are compatible, and report
// the problem with the corresponding least-upper-bound and the
// remaining type(s).
let distinct_arg_types: Vec<_> = types
.iter()
.unique_by(|l| l.inner())
.map(|arg_type| {
arg_type
.location()
.secondary_label()
.with_message(format!("Type: {arg_type}"))
})
.collect();

let distinct_arg_types = distinct_arg_types(types);
DiagnosticCode::InvalidArgumentType
.builder()
.with_label(
Expand All @@ -307,6 +304,27 @@ fn solve_type_class(
})
}

// Only report each distinct type as a problem once. This reduces clutter in the
// error. TODO: Attempt to minimize the number of types involved
// further. Find a subset of types that are compatible, and report
// the problem with the corresponding least-upper-bound and the
// remaining type(s).
fn distinct_arg_types(
types: &[Located<FenlType>],
) -> Vec<codespan_reporting::diagnostic::Label<sparrow_syntax::FeatureSetPart>> {
let distinct_arg_types: Vec<_> = types
.iter()
.unique_by(|l| l.inner())
.map(|arg_type| {
arg_type
.location()
.secondary_label()
.with_message(format!("Type: {arg_type}"))
})
.collect();
distinct_arg_types
}

/// Instantiate the (possibly generic) `FenlType` using the computed
/// solutions.
fn instantiate_type(fenl_type: &FenlType, solutions: &HashMap<TypeVariable, FenlType>) -> FenlType {
Expand Down
6 changes: 2 additions & 4 deletions crates/sparrow-compiler/tests/compiler_golden_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -614,16 +614,14 @@ async fn test_incompatible_err() {
.await;

insta::assert_snapshot!(error, @r###"
error[E0010]: Invalid argument type(s)
error[E0015]: Incompatible argument types
--> Query:1:13
|
1 | Sent.amount == "hello"
| ----------- ^^ ------- Type: string
| | |
| | Invalid types for call to 'eq'
| | Incompatible types for call to 'eq'
| Type: f64
|
= Expected 'any'
"###);
}

Expand Down
1 change: 1 addition & 0 deletions crates/sparrow-instructions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ The instructions supported in Sparrow execution plans.
[dependencies]
anyhow.workspace = true
arrow.workspace = true
arrow-schema.workspace = true
bincode.workspace = true
bit-set.workspace = true
bitvec.workspace = true
Expand Down
24 changes: 4 additions & 20 deletions crates/sparrow-instructions/src/evaluators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@ use arrow::datatypes::DataType;
use itertools::Itertools;
use sparrow_plan::{InstKind, InstOp};

use self::macros::create_signed_evaluator;
use self::map::get_string_to_primitive::GetStringToPrimitiveEvaluator;
use crate::evaluators::macros::{
create_float_evaluator, create_map_evaluator, create_number_evaluator,
create_ordered_evaluator, create_string_map_evaluator, create_typed_evaluator,
create_float_evaluator, create_number_evaluator, create_ordered_evaluator,
create_signed_evaluator, create_typed_evaluator,
};
use crate::{ColumnarValue, ComputeStore, GroupingIndices};

Expand All @@ -34,6 +32,7 @@ use field_ref::*;
use general::*;
use json_field::*;
use logical::*;
use map::*;
use math::*;
use record::*;
use sparrow_plan::ValueRef;
Expand Down Expand Up @@ -203,22 +202,7 @@ fn create_simple_evaluator(
)
}
InstOp::Floor => FloorEvaluator::try_new(info),
InstOp::Get => match &info.args[1].data_type {
DataType::Map(f, _) => match f.data_type() {
DataType::Struct(fields) => {
debug_assert!(fields.len() == 2);
// Once we support all types, we can use the `create_typed_evaluator` macro
create_map_evaluator!(
&info.args[0].data_type,
&fields[1].data_type(),
GetStringToPrimitiveEvaluator,
info
)
}
other => panic!("expected struct in map type, saw {:?}", other),
},
other => panic!("expected map, saw {:?}", other),
},
InstOp::Get => GetEvaluator::try_new(info),
InstOp::Gt => match (info.args[0].is_literal(), info.args[1].is_literal()) {
(_, true) => {
create_ordered_evaluator!(&info.args[0].data_type, GtScalarEvaluator, info)
Expand Down
90 changes: 2 additions & 88 deletions crates/sparrow-instructions/src/evaluators/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,93 +297,7 @@ macro_rules! create_typed_evaluator {
}};
}

/// Create a `Box<dyn Evaluator>` for a map instruction.
///
/// The `$evaluator` must be a `struct` that takes a generic datatype T
/// and Offset O and which implements `Evaluator`.
///
/// The `$info` should be a `StaticInfo`, which contains information relevant
/// to creating a new evaluator.
/// # Example
///
/// ```no_run
/// struct GetStringToPrimitiveEvaluator<O: OffsetTrait, T: ArrowPrimitiveType>;
/// impl<O: OffsetTrait, T: ArrowPrimitiveType> Evaluator for GetStringToPrimitiveEvaluator<O, T> { ... }
///
/// fn create_evaluator(args: Vec<StaticArg>, result_type: &DataType) -> anyhow::Result<Self> {
/// create_map_evaluator!(key_type, map_value_type, GetStringToPrimitiveEvaluator, info)
/// }
/// ```
macro_rules! create_map_evaluator {
($key_type:expr, $value_type:expr, $string_evaluator:ident, $info:expr) => {{
use arrow::datatypes::*;
use create_string_map_evaluator;
match $key_type {
DataType::Utf8 => {
create_string_map_evaluator!(i32, $value_type, $string_evaluator, $info)
}
DataType::LargeUtf8 => {
create_string_map_evaluator!(i64, $value_type, $string_evaluator, $info)
}
unsupported_type => Err(anyhow::anyhow!(format!(
"unsupported key type {:?} for map evaluator",
unsupported_type
))),
}
}};
}

macro_rules! create_string_map_evaluator {
($offset_size:ident, $input_type:expr, $evaluator:ident, $info:expr) => {{
use arrow::datatypes::*;
match $input_type {
DataType::Int32 => $evaluator::<$offset_size, Int32Type>::try_new($info),
DataType::Int64 => $evaluator::<$offset_size, Int64Type>::try_new($info),
DataType::UInt32 => $evaluator::<$offset_size, UInt32Type>::try_new($info),
DataType::UInt64 => $evaluator::<$offset_size, UInt64Type>::try_new($info),
DataType::Float32 => $evaluator::<$offset_size, Float32Type>::try_new($info),
DataType::Float64 => $evaluator::<$offset_size, Float64Type>::try_new($info),
DataType::Timestamp(TimeUnit::Second, None) => {
$evaluator::<$offset_size, TimestampSecondType>::try_new($info)
}
DataType::Timestamp(TimeUnit::Millisecond, None) => {
$evaluator::<$offset_size, TimestampMillisecondType>::try_new($info)
}
DataType::Timestamp(TimeUnit::Microsecond, None) => {
$evaluator::<$offset_size, TimestampMicrosecondType>::try_new($info)
}
DataType::Timestamp(TimeUnit::Nanosecond, None) => {
$evaluator::<$offset_size, TimestampNanosecondType>::try_new($info)
}
DataType::Duration(TimeUnit::Second) => {
$evaluator::<$offset_size, DurationSecondType>::try_new($info)
}
DataType::Duration(TimeUnit::Millisecond) => {
$evaluator::<$offset_size, DurationMillisecondType>::try_new($info)
}
DataType::Duration(TimeUnit::Microsecond) => {
$evaluator::<$offset_size, DurationMicrosecondType>::try_new($info)
}
DataType::Duration(TimeUnit::Nanosecond) => {
$evaluator::<$offset_size, DurationNanosecondType>::try_new($info)
}
DataType::Interval(IntervalUnit::DayTime) => {
$evaluator::<$offset_size, IntervalDayTimeType>::try_new($info)
}
DataType::Interval(IntervalUnit::YearMonth) => {
$evaluator::<$offset_size, IntervalYearMonthType>::try_new($info)
}
unsupported_type => Err(anyhow::anyhow!(format!(
"Unsupported non-primitive value type {:?} for {}",
unsupported_type,
stringify!($evaluator)
))),
}
}};
}

pub(super) use {
create_float_evaluator, create_map_evaluator, create_number_evaluator,
create_ordered_evaluator, create_signed_evaluator, create_string_map_evaluator,
create_typed_evaluator,
create_float_evaluator, create_number_evaluator, create_ordered_evaluator,
create_signed_evaluator, create_typed_evaluator,
};
3 changes: 2 additions & 1 deletion crates/sparrow-instructions/src/evaluators/map.rs
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
pub(super) mod get_string_to_primitive;
mod get;
pub(super) use get::*;
Loading

0 comments on commit 1037a91

Please sign in to comment.