Skip to content

Commit

Permalink
Fix review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
PHILO-HE committed Dec 6, 2023
1 parent 3d08dd7 commit e0487e3
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 19 deletions.
4 changes: 2 additions & 2 deletions velox/docs/functions/spark/string.rst
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,8 @@ Unless specified otherwise, all functions return NULL if at least one of the arg
Spark's default behavior. ::

SELECT str_to_map('a:1,b:2,c:3', ',', ':'); -- {"a":"1","b":"2","c":"3"}
SELECT str_to_map('a', ',', ':'); -- {'a':NULL}
SELECT str_to_map('', ',', ':'); -- {'':NULL}
SELECT str_to_map('a', ',', ':'); -- {"a":NULL}
SELECT str_to_map('', ',', ':'); -- {"":NULL}
SELECT str_to_map('a:1,b:2,c:3', ',', ','); -- {"a:1":NULL,"b:2":NULL,"c:3":NULL}

.. spark:function:: substring(string, start) -> varchar
Expand Down
11 changes: 5 additions & 6 deletions velox/functions/sparksql/StringToMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@

namespace facebook::velox::functions::sparksql {

template <typename TExecCtx>
template <typename T>
struct StringToMapFunction {
VELOX_DEFINE_FUNCTION_TYPES(TExecCtx);
VELOX_DEFINE_FUNCTION_TYPES(T);

// Results refer to strings in the first argument.
static constexpr int32_t reuse_strings_from_arg = 0;
Expand All @@ -32,8 +32,8 @@ struct StringToMapFunction {
const arg_type<Varchar>& input,
const arg_type<Varchar>& entryDelimiter,
const arg_type<Varchar>& keyValueDelimiter) {
VELOX_USER_CHECK(!entryDelimiter.empty(), "entryDelimiter is empty");
VELOX_USER_CHECK(!keyValueDelimiter.empty(), "keyValueDelimiter is empty");
VELOX_USER_CHECK(!entryDelimiter.empty(), "entryDelimiter is empty.");
VELOX_USER_CHECK(!keyValueDelimiter.empty(), "keyValueDelimiter is empty.");

callImpl(
out,
Expand All @@ -53,7 +53,6 @@ struct StringToMapFunction {
std::string_view entryDelimiter,
std::string_view keyValueDelimiter) const {
size_t pos = 0;

folly::F14FastSet<std::string_view> keys;

auto nextEntryPos = input.find(entryDelimiter, pos);
Expand Down Expand Up @@ -81,7 +80,7 @@ struct StringToMapFunction {
std::string_view keyValueDelimiter,
folly::F14FastSet<std::string_view>& keys) const {
const auto delimiterPos = entry.find(keyValueDelimiter, 0);
// Not found key/value delimiter.
// Allows keyValue delimiter not found.
if (delimiterPos == std::string::npos) {
out.add_null().setNoCopy(StringView(entry));
return;
Expand Down
31 changes: 20 additions & 11 deletions velox/functions/sparksql/tests/StringToMapTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,52 +15,61 @@
*/
#include "velox/common/base/tests/GTestUtils.h"
#include "velox/functions/sparksql/tests/SparkFunctionBaseTest.h"
#include "velox/type/Type.h"

namespace facebook::velox::functions::sparksql::test {
using namespace facebook::velox::test;

namespace facebook::velox::functions::sparksql::test {
namespace {
class StringToMapTest : public SparkFunctionBaseTest {
protected:
VectorPtr evaluateStringToMap(const std::vector<StringView>& inputs) {
const std::string expr =
fmt::format("str_to_map(c0, '{}', '{}')", inputs[1], inputs[2]);
return evaluate<MapVector>(
expr, makeRowVector({makeFlatVector<StringView>({inputs[0]})}));
}

void testStringToMap(
const std::vector<StringView>& inputs,
const std::vector<std::pair<StringView, std::optional<StringView>>>&
expect) {
std::vector<VectorPtr> row;
row.emplace_back(makeFlatVector<StringView>({inputs[0]}));
std::string expr =
fmt::format("str_to_map(c0, '{}', '{}')", inputs[1], inputs[2]);
auto result = evaluate<MapVector>(expr, makeRowVector(row));
auto expected = makeMapVector<StringView, StringView>({expect});
assertEqualVectors(result, expected);
auto result = evaluateStringToMap(inputs);
auto expectVector = makeMapVector<StringView, StringView>({expect});
assertEqualVectors(result, expectVector);
}
};

TEST_F(StringToMapTest, Basics) {
testStringToMap(
{"a:1,b:2,c:3", ",", ":"}, {{"a", "1"}, {"b", "2"}, {"c", "3"}});
testStringToMap({"a: ,b:2", ",", ":"}, {{"a", " "}, {"b", "2"}});
testStringToMap({"a:,b:2", ",", ":"}, {{"a", ""}, {"b", "2"}});
testStringToMap({"", ",", ":"}, {{"", std::nullopt}});
testStringToMap({"a", ",", ":"}, {{"a", std::nullopt}});
testStringToMap(
{"a=1,b=2,c=3", ",", "="}, {{"a", "1"}, {"b", "2"}, {"c", "3"}});
testStringToMap({"", ",", "="}, {{"", std::nullopt}});
testStringToMap(
{"a::1,b::2,c::3", ",", "c"},
{{"", "::3"}, {"a::1", std::nullopt}, {"b::2", std::nullopt}});
{{"a::1", std::nullopt}, {"b::2", std::nullopt}, {"", "::3"}});
testStringToMap(
{"a:1_b:2_c:3", "_", ":"}, {{"a", "1"}, {"b", "2"}, {"c", "3"}});

// Same delimiters.
testStringToMap(
{"a:1,b:2,c:3", ",", ","},
{{"a:1", std::nullopt}, {"b:2", std::nullopt}, {"c:3", std::nullopt}});
testStringToMap(
{"a:1_b:2_c:3", "_", "_"},
{{"a:1", std::nullopt}, {"b:2", std::nullopt}, {"c:3", std::nullopt}});

// Exception for duplicated keys.
VELOX_ASSERT_THROW(
testStringToMap({"a:1,b:2,a:3", ",", ":"}, {{"a", "3"}, {"b", "2"}}),
evaluateStringToMap({"a:1,b:2,a:3", ",", ":"}),
"Duplicate keys are not allowed: ('a').");
VELOX_ASSERT_THROW(
evaluateStringToMap({":1,:2", ",", ":"}),
"Duplicate keys are not allowed: ('').");
}
} // namespace
} // namespace facebook::velox::functions::sparksql::test

0 comments on commit e0487e3

Please sign in to comment.