Skip to content

Commit

Permalink
Merge remote-tracking branch 'apache/main' into alamb/consolidate_to_…
Browse files Browse the repository at this point in the history
…date
  • Loading branch information
alamb committed Dec 30, 2024
2 parents e06d83f + 4d07579 commit ffc7941
Show file tree
Hide file tree
Showing 27 changed files with 918 additions and 1,232 deletions.
3 changes: 3 additions & 0 deletions benchmarks/src/clickbench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,9 @@ impl RunOpt {
);
benchmark_run.write_iter(elapsed, row_count);
}
if self.common.debug {
ctx.sql(sql).await?.explain(false, false)?.show().await?;
}
}
benchmark_run.maybe_write_json(self.output_path.as_ref())?;
Ok(())
Expand Down
49 changes: 20 additions & 29 deletions datafusion/functions-nested/src/flatten.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ use datafusion_common::cast::{
as_generic_list_array, as_large_list_array, as_list_array,
};
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
use datafusion_expr::{
ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature,
TypeSignature, Volatility,
};
use datafusion_macros::user_doc;
use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

make_udf_expr_and_func!(
Flatten,
Expand All @@ -42,6 +42,23 @@ make_udf_expr_and_func!(
flatten_udf
);

#[user_doc(
doc_section(label = "Array Functions"),
description = "Converts an array of arrays to a flat array.\n\n- Applies to any depth of nested arrays\n- Does not change arrays that are already flat\n\nThe flattened array contains all the elements from all source arrays.",
syntax_example = "flatten(array)",
sql_example = r#"```sql
> select flatten([[1, 2], [3, 4]]);
+------------------------------+
| flatten(List([1,2], [3,4])) |
+------------------------------+
| [1, 2, 3, 4] |
+------------------------------+
```"#,
argument(
name = "array",
description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
)
)]
#[derive(Debug)]
pub struct Flatten {
signature: Signature,
Expand Down Expand Up @@ -118,35 +135,9 @@ impl ScalarUDFImpl for Flatten {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_flatten_doc())
self.doc()
}
}
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_flatten_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_ARRAY,
"Converts an array of arrays to a flat array.\n\n- Applies to any depth of nested arrays\n- Does not change arrays that are already flat\n\nThe flattened array contains all the elements from all source arrays.",

"flatten(array)")
.with_sql_example(
r#"```sql
> select flatten([[1, 2], [3, 4]]);
+------------------------------+
| flatten(List([1,2], [3,4])) |
+------------------------------+
| [1, 2, 3, 4] |
+------------------------------+
```"#,
)
.with_argument(
"array",
"Array expression. Can be a constant, column, or function, and any combination of array operators.",
)
.build()
})
}

/// Flatten SQL function
pub fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
Expand Down
55 changes: 21 additions & 34 deletions datafusion/functions-nested/src/length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ use arrow_schema::DataType;
use arrow_schema::DataType::{FixedSizeList, LargeList, List, UInt64};
use datafusion_common::cast::{as_generic_list_array, as_int64_array};
use datafusion_common::{exec_err, internal_datafusion_err, plan_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_functions::{downcast_arg, downcast_named_arg};
use datafusion_macros::user_doc;
use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

make_udf_expr_and_func!(
ArrayLength,
Expand All @@ -41,6 +41,24 @@ make_udf_expr_and_func!(
array_length_udf
);

#[user_doc(
doc_section(label = "Array Functions"),
description = "Returns the length of the array dimension.",
syntax_example = "array_length(array, dimension)",
sql_example = r#"```sql
> select array_length([1, 2, 3, 4, 5], 1);
+-------------------------------------------+
| array_length(List([1,2,3,4,5]), 1) |
+-------------------------------------------+
| 5 |
+-------------------------------------------+
```"#,
argument(
name = "array",
description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
),
argument(name = "dimension", description = "Array dimension.")
)]
#[derive(Debug)]
pub struct ArrayLength {
signature: Signature,
Expand Down Expand Up @@ -96,41 +114,10 @@ impl ScalarUDFImpl for ArrayLength {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_array_length_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_array_length_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_ARRAY,
"Returns the length of the array dimension.",

"array_length(array, dimension)")
.with_sql_example(
r#"```sql
> select array_length([1, 2, 3, 4, 5], 1);
+-------------------------------------------+
| array_length(List([1,2,3,4,5]), 1) |
+-------------------------------------------+
| 5 |
+-------------------------------------------+
```"#,
)
.with_argument(
"array",
"Array expression. Can be a constant, column, or function, and any combination of array operators.",
)
.with_argument(
"dimension",
"Array dimension.",
)
.build()
})
}

/// Array_length SQL function
pub fn array_length_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 1 && args.len() != 2 {
Expand Down
53 changes: 21 additions & 32 deletions datafusion/functions-nested/src/make_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
//! [`ScalarUDFImpl`] definitions for `make_array` function.
use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;
use std::vec;

use crate::utils::make_scalar_function;
use arrow::array::{ArrayData, Capacities, MutableArrayData};
use arrow_array::{
new_null_array, Array, ArrayRef, GenericListArray, NullArray, OffsetSizeTrait,
Expand All @@ -33,13 +34,11 @@ use datafusion_common::{plan_err, Result};
use datafusion_expr::binary::{
try_type_union_resolution_with_struct, type_union_resolution,
};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_ARRAY;
use datafusion_expr::TypeSignature;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};

use crate::utils::make_scalar_function;
use datafusion_macros::user_doc;

make_udf_expr_and_func!(
MakeArray,
Expand All @@ -48,6 +47,23 @@ make_udf_expr_and_func!(
make_array_udf
);

#[user_doc(
doc_section(label = "Array Functions"),
description = "Returns an array using the specified input expressions.",
syntax_example = "make_array(expression1[, ..., expression_n])",
sql_example = r#"```sql
> select make_array(1, 2, 3, 4, 5);
+----------------------------------------------------------+
| make_array(Int64(1),Int64(2),Int64(3),Int64(4),Int64(5)) |
+----------------------------------------------------------+
| [1, 2, 3, 4, 5] |
+----------------------------------------------------------+
```"#,
argument(
name = "expression_n",
description = "Expression to include in the output array. Can be a constant, column, or function, and any combination of arithmetic or string operators."
)
)]
#[derive(Debug)]
pub struct MakeArray {
signature: Signature,
Expand Down Expand Up @@ -139,37 +155,10 @@ impl ScalarUDFImpl for MakeArray {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_make_array_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_make_array_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_ARRAY,
"Returns an array using the specified input expressions.",

"make_array(expression1[, ..., expression_n])")
.with_sql_example(
r#"```sql
> select make_array(1, 2, 3, 4, 5);
+----------------------------------------------------------+
| make_array(Int64(1),Int64(2),Int64(3),Int64(4),Int64(5)) |
+----------------------------------------------------------+
| [1, 2, 3, 4, 5] |
+----------------------------------------------------------+
```"#,
)
.with_argument(
"expression_n",
"Expression to include in the output array. Can be a constant, column, or function, and any combination of arithmetic or string operators.",
)
.build()
})
}

// Empty array is a special case that is useful for many other array functions
pub(super) fn empty_array_type() -> DataType {
List(Arc::new(Field::new_list_field(DataType::Int64, true)))
Expand Down
105 changes: 47 additions & 58 deletions datafusion/functions-nested/src/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use std::any::Any;
use std::collections::VecDeque;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::ArrayData;
use arrow_array::{Array, ArrayRef, MapArray, OffsetSizeTrait, StructArray};
Expand All @@ -27,10 +27,10 @@ use arrow_schema::{DataType, Field, SchemaBuilder};
use datafusion_common::utils::{fixed_size_list_to_arrays, list_to_arrays};
use datafusion_common::{exec_err, HashSet, Result, ScalarValue};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_MAP;
use datafusion_expr::{
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;

use crate::make_array::make_array;

Expand Down Expand Up @@ -181,6 +181,50 @@ fn make_map_batch_internal(
})
}

#[user_doc(
doc_section(label = "Map Functions"),
description = "Returns an Arrow map with the specified key-value pairs.\n\n\
The `make_map` function creates a map from two lists: one for keys and one for values. Each key must be unique and non-null.",
syntax_example = "map(key, value)\nmap(key: value)\nmake_map(['key1', 'key2'], ['value1', 'value2'])",
sql_example = r#"
```sql
-- Using map function
SELECT MAP('type', 'test');
----
{type: test}
SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]);
----
{POST: 41, HEAD: 33, PATCH: }
SELECT MAP([[1,2], [3,4]], ['a', 'b']);
----
{[1, 2]: a, [3, 4]: b}
SELECT MAP { 'a': 1, 'b': 2 };
----
{a: 1, b: 2}
-- Using make_map function
SELECT MAKE_MAP(['POST', 'HEAD'], [41, 33]);
----
{POST: 41, HEAD: 33}
SELECT MAKE_MAP(['key1', 'key2'], ['value1', null]);
----
{key1: value1, key2: }
```"#,
argument(
name = "key",
description = "For `map`: Expression to be used for key. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\
For `make_map`: The list of keys to be used in the map. Each key must be unique and non-null."
),
argument(
name = "value",
description = "For `map`: Expression to be used for value. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\
For `make_map`: The list of values to be mapped to the corresponding keys."
)
)]
#[derive(Debug)]
pub struct MapFunc {
signature: Signature,
Expand Down Expand Up @@ -247,65 +291,10 @@ impl ScalarUDFImpl for MapFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_map_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_map_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_MAP,
"Returns an Arrow map with the specified key-value pairs.\n\n\
The `make_map` function creates a map from two lists: one for keys and one for values. Each key must be unique and non-null.",

"map(key, value)\nmap(key: value)\nmake_map(['key1', 'key2'], ['value1', 'value2'])"
)
.with_sql_example(
r#"
```sql
-- Using map function
SELECT MAP('type', 'test');
----
{type: test}
SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]);
----
{POST: 41, HEAD: 33, PATCH: }
SELECT MAP([[1,2], [3,4]], ['a', 'b']);
----
{[1, 2]: a, [3, 4]: b}
SELECT MAP { 'a': 1, 'b': 2 };
----
{a: 1, b: 2}
-- Using make_map function
SELECT MAKE_MAP(['POST', 'HEAD'], [41, 33]);
----
{POST: 41, HEAD: 33}
SELECT MAKE_MAP(['key1', 'key2'], ['value1', null]);
----
{key1: value1, key2: }
```"#,
)
.with_argument(
"key",
"For `map`: Expression to be used for key. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\
For `make_map`: The list of keys to be used in the map. Each key must be unique and non-null."
)
.with_argument(
"value",
"For `map`: Expression to be used for value. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\
For `make_map`: The list of values to be mapped to the corresponding keys."
)
.build()
})
}

fn get_element_type(data_type: &DataType) -> Result<&DataType> {
match data_type {
DataType::List(element) => Ok(element.data_type()),
Expand Down
Loading

0 comments on commit ffc7941

Please sign in to comment.