diff --git a/velox/docs/functions/spark/string.rst b/velox/docs/functions/spark/string.rst index a973ecb13bb28..e89246fd7af59 100644 --- a/velox/docs/functions/spark/string.rst +++ b/velox/docs/functions/spark/string.rst @@ -182,8 +182,8 @@ Unless specified otherwise, all functions return NULL if at least one of the arg Returns a map by splitting ``string`` into entries with ``entryDelim`` and splitting each entry into key/value with ``keyValueDelim``. - Only supports constant single-character `entryDelim` and `keyValueDelim`. - Does not allow duplicated map keys, consistent with Spark's default behavior. :: + Only supports constant single-character ``entryDelim`` and ``keyValueDelim``. Disallows + duplicated map keys in result of each row, consistent with Spark's default behavior. :: SELECT str_to_map('a:1,b:2,c:3', ',', ':'); -- {"a":"1","b":"2","c":"3"} diff --git a/velox/functions/sparksql/SplitFunctions.cpp b/velox/functions/sparksql/SplitFunctions.cpp index cff58b5ba0c4e..d30ffed81ab66 100644 --- a/velox/functions/sparksql/SplitFunctions.cpp +++ b/velox/functions/sparksql/SplitFunctions.cpp @@ -154,8 +154,8 @@ class StringToMap final : public exec::VectorFunction { exec::VectorWriter> resultWriter; resultWriter.init(*result->as()); - folly::F14FastMap keyToIdx; context.applyToSelectedNoThrow(rows, [&](vector_size_t row) { + folly::F14FastSet keys; resultWriter.setOffset(row); auto& mapWriter = resultWriter.current(); @@ -168,12 +168,10 @@ class StringToMap final : public exec::VectorFunction { entryEnd = std::find(pos, end, entryDelim_); keyEnd = std::find(pos, entryEnd, keyValueDelim_); const auto key = StringView(pos, keyEnd - pos); - const auto iter = keyToIdx.find(key); VELOX_USER_CHECK( - iter == keyToIdx.end(), + keys.insert(key).second, "Duplicated keys ('{}') are not allowed.", key); - keyToIdx.emplace(key, mapWriter.size()); if (keyEnd == entryEnd) { mapWriter.add_null().append(key); } else { @@ -212,14 +210,14 @@ std::shared_ptr createStringToMap( const std::vector& inputArgs, const core::QueryConfig& /*config*/) { VELOX_USER_CHECK_EQ( - inputArgs.size(), 3, "Expects 3 arguments for StringToMap function."); + inputArgs.size(), 3, "Expects 3 arguments for StringToMap."); auto getDelimiter = [](exec::VectorFunctionArg inputArg) -> std::optional { BaseVector* constantVector = inputArg.constantValue.get(); VELOX_USER_CHECK_NOT_NULL( constantVector, - "StringToMap function requires constant entry/key-value delimiter."); + "StringToMap requires constant entry/key-value delimiter."); const auto constantStringView = constantVector->as>(); if (constantStringView->isNullAt(0)) {