Skip to content

Commit

Permalink
Check duplicated just for single row
Browse files Browse the repository at this point in the history
  • Loading branch information
PHILO-HE committed Nov 10, 2023
1 parent c12d970 commit f435675
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 8 deletions.
4 changes: 2 additions & 2 deletions velox/docs/functions/spark/string.rst
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,8 @@ Unless specified otherwise, all functions return NULL if at least one of the arg
Returns a map by splitting ``string`` into entries with ``entryDelim`` and splitting
each entry into key/value with ``keyValueDelim``.
Only supports constant single-character `entryDelim` and `keyValueDelim`.
Does not allow duplicated map keys, consistent with Spark's default behavior. ::
Only supports constant single-character ``entryDelim`` and ``keyValueDelim``. Disallows
duplicated map keys in result of each row, consistent with Spark's default behavior. ::

SELECT str_to_map('a:1,b:2,c:3', ',', ':'); -- {"a":"1","b":"2","c":"3"}

Expand Down
10 changes: 4 additions & 6 deletions velox/functions/sparksql/SplitFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ class StringToMap final : public exec::VectorFunction {
exec::VectorWriter<Map<Varchar, Varchar>> resultWriter;
resultWriter.init(*result->as<MapVector>());

folly::F14FastMap<StringView, vector_size_t> keyToIdx;
context.applyToSelectedNoThrow(rows, [&](vector_size_t row) {
folly::F14FastSet<StringView> keys;
resultWriter.setOffset(row);
auto& mapWriter = resultWriter.current();

Expand All @@ -168,12 +168,10 @@ class StringToMap final : public exec::VectorFunction {
entryEnd = std::find(pos, end, entryDelim_);
keyEnd = std::find(pos, entryEnd, keyValueDelim_);
const auto key = StringView(pos, keyEnd - pos);
const auto iter = keyToIdx.find(key);
VELOX_USER_CHECK(
iter == keyToIdx.end(),
keys.insert(key).second,
"Duplicated keys ('{}') are not allowed.",
key);
keyToIdx.emplace(key, mapWriter.size());
if (keyEnd == entryEnd) {
mapWriter.add_null().append(key);
} else {
Expand Down Expand Up @@ -212,14 +210,14 @@ std::shared_ptr<exec::VectorFunction> createStringToMap(
const std::vector<exec::VectorFunctionArg>& inputArgs,
const core::QueryConfig& /*config*/) {
VELOX_USER_CHECK_EQ(
inputArgs.size(), 3, "Expects 3 arguments for StringToMap function.");
inputArgs.size(), 3, "Expects 3 arguments for StringToMap.");

auto getDelimiter =
[](exec::VectorFunctionArg inputArg) -> std::optional<char> {
BaseVector* constantVector = inputArg.constantValue.get();
VELOX_USER_CHECK_NOT_NULL(
constantVector,
"StringToMap function requires constant entry/key-value delimiter.");
"StringToMap requires constant entry/key-value delimiter.");
const auto constantStringView =
constantVector->as<ConstantVector<StringView>>();
if (constantStringView->isNullAt(0)) {
Expand Down

0 comments on commit f435675

Please sign in to comment.