-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Introduce TypeSignature::Comparable and update NullIf
signature
#13356
Changes from 6 commits
c6ef8a5
ffd02c4
784018f
ca87f2c
95f09c4
60496e5
8d0b73f
e4316a2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,7 +18,7 @@ | |
//! Signature module contains foundational types that are used to represent signatures, types, | ||
//! and return types of functions in DataFusion. | ||
|
||
use crate::type_coercion::aggregates::{NUMERICS, STRINGS}; | ||
use crate::type_coercion::aggregates::NUMERICS; | ||
use arrow::datatypes::DataType; | ||
use datafusion_common::types::{LogicalTypeRef, NativeType}; | ||
use itertools::Itertools; | ||
|
@@ -113,6 +113,8 @@ pub enum TypeSignature { | |
/// arguments like `vec![DataType::Int32]` or `vec![DataType::Float32]` | ||
/// since i32 and f32 can be casted to f64 | ||
Coercible(Vec<LogicalTypeRef>), | ||
/// The number of arguments that are comparable | ||
Comparable(usize), | ||
/// Fixed number of arguments of arbitrary types, number should be larger than 0 | ||
Any(usize), | ||
/// Matches exactly one of a list of [`TypeSignature`]s. Coercion is attempted to match | ||
|
@@ -138,6 +140,13 @@ pub enum TypeSignature { | |
NullAry, | ||
} | ||
|
||
impl TypeSignature { | ||
#[inline] | ||
pub fn is_one_of(&self) -> bool { | ||
matches!(self, TypeSignature::OneOf(_)) | ||
} | ||
} | ||
|
||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] | ||
pub enum ArrayFunctionSignature { | ||
/// Specialized Signature for ArrayAppend and similar functions | ||
|
@@ -210,6 +219,9 @@ impl TypeSignature { | |
TypeSignature::Numeric(num) => { | ||
vec![format!("Numeric({num})")] | ||
} | ||
TypeSignature::Comparable(num) => { | ||
vec![format!("Comparable({num})")] | ||
} | ||
TypeSignature::Coercible(types) => { | ||
vec![Self::join_types(types, ", ")] | ||
} | ||
|
@@ -284,13 +296,13 @@ impl TypeSignature { | |
.cloned() | ||
.map(|numeric_type| vec![numeric_type; *arg_count]) | ||
.collect(), | ||
TypeSignature::String(arg_count) => STRINGS | ||
.iter() | ||
.cloned() | ||
.map(|string_type| vec![string_type; *arg_count]) | ||
.collect(), | ||
TypeSignature::String(arg_count) => get_data_types(&NativeType::String) | ||
.into_iter() | ||
.map(|dt| vec![dt; *arg_count]) | ||
.collect::<Vec<_>>(), | ||
Comment on lines
+306
to
+309
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
// TODO: Implement for other types | ||
TypeSignature::Any(_) | ||
| TypeSignature::Comparable(_) | ||
| TypeSignature::NullAry | ||
| TypeSignature::VariadicAny | ||
| TypeSignature::ArraySignature(_) | ||
|
@@ -412,6 +424,14 @@ impl Signature { | |
} | ||
} | ||
|
||
/// Used for function that expects comparable data types, it will try to coerced all the types into single final one. | ||
pub fn comparable(arg_count: usize, volatility: Volatility) -> Self { | ||
Self { | ||
type_signature: TypeSignature::Comparable(arg_count), | ||
volatility, | ||
} | ||
} | ||
|
||
pub fn nullary(volatility: Volatility) -> Self { | ||
Signature { | ||
type_signature: TypeSignature::NullAry, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,7 +29,7 @@ use datafusion_common::{ | |
}; | ||
use datafusion_expr_common::{ | ||
signature::{ArrayFunctionSignature, FIXED_SIZE_LIST_WILDCARD, TIMEZONE_WILDCARD}, | ||
type_coercion::binary::string_coercion, | ||
type_coercion::binary::{comparison_coercion_numeric, string_coercion}, | ||
}; | ||
use std::sync::Arc; | ||
|
||
|
@@ -182,6 +182,7 @@ fn is_well_supported_signature(type_signature: &TypeSignature) -> bool { | |
| TypeSignature::Coercible(_) | ||
| TypeSignature::Any(_) | ||
| TypeSignature::NullAry | ||
| TypeSignature::Comparable(_) | ||
) | ||
} | ||
|
||
|
@@ -194,13 +195,18 @@ fn try_coerce_types( | |
|
||
// Well-supported signature that returns exact valid types. | ||
if !valid_types.is_empty() && is_well_supported_signature(type_signature) { | ||
// exact valid types | ||
assert_eq!(valid_types.len(), 1); | ||
// There may be many valid types if valid signature is OneOf | ||
// Otherwise, there should be only one valid type | ||
if !type_signature.is_one_of() { | ||
assert_eq!(valid_types.len(), 1); | ||
} | ||
|
||
let valid_types = valid_types.swap_remove(0); | ||
if let Some(t) = maybe_data_types_without_coercion(&valid_types, current_types) { | ||
return Ok(t); | ||
} | ||
} else { | ||
// TODO: Deprecate this branch after all signatures are well-supported (aka coercion has happened already) | ||
// Try and coerce the argument types to match the signature, returning the | ||
// coerced types from the first matching signature. | ||
for valid_types in valid_types { | ||
|
@@ -515,6 +521,23 @@ fn get_valid_types( | |
|
||
vec![vec![valid_type; *number]] | ||
} | ||
TypeSignature::Comparable(num) => { | ||
function_length_check(current_types.len(), *num)?; | ||
let mut target_type = current_types[0].to_owned(); | ||
for data_type in current_types.iter().skip(1) { | ||
if let Some(dt) = comparison_coercion_numeric(&target_type, data_type) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems like I think this is fine, but should probably be documented better / more explicitly as it might be expected Also the coercion / defaulting to |
||
target_type = dt; | ||
} else { | ||
return plan_err!("{target_type} and {data_type} is not comparable"); | ||
} | ||
} | ||
// Convert null to String type. | ||
if target_type.is_null() { | ||
vec![vec![DataType::Utf8View; *num]] | ||
} else { | ||
vec![vec![target_type; *num]] | ||
} | ||
} | ||
TypeSignature::Coercible(target_types) => { | ||
function_length_check(current_types.len(), target_types.len())?; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -97,11 +97,54 @@ SELECT NULLIF(1, 3); | |
---- | ||
1 | ||
|
||
query I | ||
query T | ||
SELECT NULLIF(NULL, NULL); | ||
---- | ||
NULL | ||
|
||
query R | ||
select nullif(1, 1.2); | ||
---- | ||
1 | ||
|
||
query R | ||
select nullif(1.0, 2); | ||
---- | ||
1 | ||
|
||
query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type | ||
select nullif(2, 'a'); | ||
|
||
query T | ||
select nullif('2', '3'); | ||
---- | ||
2 | ||
|
||
query I | ||
select nullif(2, '1'); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This also used to work.
Interesting that the type is text vs number but still, it did work. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is quite a hard issue to fix, since we can't tell the difference between Unknown type and String type currently This query pass in
The change did have regression on There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar issue in #13240 (comment) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't have strong opinions against this PR because it fixes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tracked in #13285 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Duckdb can run this query
😕 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To me what is important is not strictly what other systems support - those may/could be a guide to what datafusion will support - but whether the provided arguments to a signature can be losslessly converted to what the signature accepts and whether it logically makes sense to do so. I personally would rather be lenient for what is accepted and do casting/coercion as required than to be strict and push the onus onto the user to do that. That's just me though, I don't know if that is the general consensus of the community. Perhaps we should file a discussion ticket with the options and decide? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just double checked on this branch and it seems good to me (supports things reasonably) Running `target/debug/datafusion-cli`
DataFusion CLI v43.0.0
> select nullif(2, '1');
+----------------------------+
| nullif(Int64(2),Utf8("1")) |
+----------------------------+
| 2 |
+----------------------------+
1 row(s) fetched.
Elapsed 0.027 seconds.
> select nullif('1'::varchar, 2);
+----------------------------+
| nullif(Utf8("1"),Int64(2)) |
+----------------------------+
| 1 |
+----------------------------+
1 row(s) fetched.
Elapsed 0.007 seconds. |
||
---- | ||
2 | ||
|
||
query I | ||
select nullif('2', 2); | ||
---- | ||
NULL | ||
|
||
query I | ||
select nullif('1', 2); | ||
---- | ||
1 | ||
|
||
statement ok | ||
create table t(a varchar, b int) as values ('1', 2), ('2', 2), ('3', 2); | ||
|
||
query I | ||
select nullif(a, b) from t; | ||
---- | ||
1 | ||
NULL | ||
3 | ||
|
||
query T | ||
SELECT NULLIF(arrow_cast('a', 'Utf8View'), 'a'); | ||
---- | ||
|
@@ -130,4 +173,4 @@ NULL | |
query T | ||
SELECT NULLIF(arrow_cast('a', 'Utf8View'), null); | ||
---- | ||
a | ||
a |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be possible to define what
comparable
means in this context?