Skip to content

Commit

Permalink
address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
yma11 committed May 16, 2024
1 parent 7f2d605 commit 1a394a7
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 32 deletions.
17 changes: 8 additions & 9 deletions velox/docs/functions/spark/map.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ Map Functions

SELECT map_from_arrays(array(1.0, 3.0), array('2', '4')); -- {1.0 -> 2, 3.0 -> 4}

.. spark:function:: map_from_entries(array(struct(K,V))) -> map(K,V)
Returns a map created from the given array of entries. Exceptions will be thrown for duplicated keys or key is null or contains null.
If null entry exists in the array, return null for this whole array. ::

SELECT map_from_entries(array(struct(1, 'a'), struct(2, 'null'))); -- {1 -> 'a', 2 -> 'null'}
SELECT map_from_entries(array(struct(1, 'a'), null)); -- {null}

.. spark::function:: map_keys(x(K,V)) -> array(K)
Returns all the keys in the map ``x``.
Expand All @@ -41,15 +49,6 @@ Map Functions
Returns all the values in the map ``x``.

.. spark:function:: map_from_entries(array(struct(K,V))) -> map(K,V)
Returns a map created from the given array of entries. Exceptions will be thrown if key is null or contains null.
If null entry exists in the array, return null for this whole array.::

SELECT map_from_entries(array(struct(1, 'a'), struct(2, 'null'))); -- {1 -> 'a', 2 -> 'null'}
SELECT map_from_entries(array(struct(1, 'a'), null)); -- {null}


.. spark:function:: size(map(K,V)) -> bigint
:noindex:

Expand Down
34 changes: 16 additions & 18 deletions velox/functions/lib/MapFromEntries.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ static const char* kIndeterminateKeyErrorMessage =
"map key cannot be indeterminate";
static const char* kErrorMessageEntryNotNull = "map entry cannot be null";

/// @tparam throwForNull If true, will return null if input array is null or has
/// null entries (Spark's behavior), instead of throwing exceptions (Presto's
/// behavior).
template <bool throwForNull>
class MapFromEntriesFunction : public exec::VectorFunction {
public:
// If throwOnNull is true, will return null if input array is null or has
// null entries (Spark's behavior), instead of throwing exceptions (Presto's
// behavior).
MapFromEntriesFunction(const bool throwOnNull) : throwOnNull_(throwOnNull) {}
void apply(
const SelectivityVector& rows,
std::vector<VectorPtr>& args,
Expand Down Expand Up @@ -98,7 +98,7 @@ class MapFromEntriesFunction : public exec::VectorFunction {
exec::LocalDecodedVector decodedRowVector(context);
decodedRowVector.get()->decode(*inputValueVector);
if (inputValueVector->typeKind() == TypeKind::UNKNOWN) {
if constexpr (throwForNull) {
if (throwOnNull_) {
try {
VELOX_USER_FAIL(kErrorMessageEntryNotNull);
} catch (...) {
Expand Down Expand Up @@ -145,7 +145,7 @@ class MapFromEntriesFunction : public exec::VectorFunction {
// Check nulls in the top level row vector.
const bool isMapEntryNull = decodedRowVector->isNullAt(offset + i);
if (isMapEntryNull) {
if constexpr (!throwForNull) {
if (!throwOnNull_) {
bits::setNull(mutableNulls, row);
resetSize(row);
break;
Expand Down Expand Up @@ -227,8 +227,9 @@ class MapFromEntriesFunction : public exec::VectorFunction {
}

// For Presto, need construct map vector based on input nulls for possible
// outer expression like try(). For Spark, use the updated nulls unless it's empty.
if constexpr (throwForNull) {
// outer expression like try(). For Spark, use the updated nulls unless it's
// empty.
if (throwOnNull_) {
nulls = inputArray->nulls();
} else {
if (decodedRowVector->size() == 0) {
Expand All @@ -248,20 +249,17 @@ class MapFromEntriesFunction : public exec::VectorFunction {
checkDuplicateKeys(mapVector, *remianingRows, context);
return mapVector;
}

bool throwOnNull_;
};
} // namespace

void registerMapFromEntriesThrowForNullFunction(const std::string& name) {
exec::registerVectorFunction(
name,
MapFromEntriesFunction</*ThrowForNull=*/true>::signatures(),
std::make_unique<MapFromEntriesFunction</*ThrowForNull=*/true>>());
}

void registerMapFromEntriesFunction(const std::string& name) {
void registerMapFromEntriesFunction(
const std::string& name,
const bool throwOnNull) {
exec::registerVectorFunction(
name,
MapFromEntriesFunction</*ThrowForNull=*/false>::signatures(),
std::make_unique<MapFromEntriesFunction</*ThrowForNull=*/false>>());
MapFromEntriesFunction::signatures(),
std::make_unique<MapFromEntriesFunction>(throwOnNull));
}
} // namespace facebook::velox::functions
6 changes: 3 additions & 3 deletions velox/functions/lib/MapFromEntries.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@

namespace facebook::velox::functions {

void registerMapFromEntriesThrowForNullFunction(const std::string& name);

void registerMapFromEntriesFunction(const std::string& name);
void registerMapFromEntriesFunction(
const std::string& name,
const bool throwForNull);

} // namespace facebook::velox::functions
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ void registerMapFunctions(const std::string& prefix) {
udf_transform_values, prefix + "transform_values");
VELOX_REGISTER_VECTOR_FUNCTION(udf_map, prefix + "map");
VELOX_REGISTER_VECTOR_FUNCTION(udf_map_entries, prefix + "map_entries");
registerMapFromEntriesThrowForNullFunction(prefix + "map_from_entries");
registerMapFromEntriesFunction(prefix + "map_from_entries", true);

VELOX_REGISTER_VECTOR_FUNCTION(udf_map_keys, prefix + "map_keys");
VELOX_REGISTER_VECTOR_FUNCTION(udf_map_values, prefix + "map_values");
Expand Down
2 changes: 1 addition & 1 deletion velox/functions/sparksql/Register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ static void workAroundRegistrationMacro(const std::string& prefix) {

VELOX_REGISTER_VECTOR_FUNCTION(
udf_map_allow_duplicates, prefix + "map_from_arrays");
registerMapFromEntriesFunction(prefix + "map_from_entries");
registerMapFromEntriesFunction(prefix + "map_from_entries", false);
VELOX_REGISTER_VECTOR_FUNCTION(
udf_concat_row, exec::RowConstructorCallToSpecialForm::kRowConstructor);
// String functions.
Expand Down

0 comments on commit 1a394a7

Please sign in to comment.