From 465baf439f08bd1e6e2ef24c4c68f3990e3bcfd2 Mon Sep 17 00:00:00 2001 From: "wangguangxin.cn" Date: Mon, 4 Mar 2024 23:26:07 +0800 Subject: [PATCH] address comments --- .../substrait/VeloxSubstraitSignature.cc | 92 ++++++++----------- 1 file changed, 37 insertions(+), 55 deletions(-) diff --git a/cpp/velox/substrait/VeloxSubstraitSignature.cc b/cpp/velox/substrait/VeloxSubstraitSignature.cc index c05b0ba6a7b4d..a7d59c20cc96b 100644 --- a/cpp/velox/substrait/VeloxSubstraitSignature.cc +++ b/cpp/velox/substrait/VeloxSubstraitSignature.cc @@ -121,29 +121,20 @@ TypePtr VeloxSubstraitSignature::fromSubstraitSignature(const std::string& signa return str.size() >= prefix.size() && str.substr(0, prefix.size()) == prefix; }; - if (startWith(signature, "dec")) { - // Decimal type name is in the format of dec. - auto precisionStart = signature.find_first_of('<'); - auto tokenIndex = signature.find_first_of(','); - auto scaleEnd = signature.find_first_of('>'); - auto precision = stoi(signature.substr(precisionStart + 1, (tokenIndex - precisionStart - 1))); - auto scale = stoi(signature.substr(tokenIndex + 1, (scaleEnd - tokenIndex - 1))); - return DECIMAL(precision, scale); - } - - if (startWith(signature, "struct")) { - // Struct type name is in the format of struct. - auto structStart = signature.find_first_of('<'); - auto structEnd = signature.find_last_of('>'); + auto parseNestedTypeSignature = [&](const std::string& signature) -> std::vector { + auto start = signature.find_first_of('<'); + auto end = signature.find_last_of('>'); VELOX_CHECK( - structEnd - structStart > 1, "Native validation failed due to: more information is needed to create RowType"); - std::string childrenTypes = signature.substr(structStart + 1, structEnd - structStart - 1); + end - start > 1, + "Native validation failed due to: more information is needed to create nested type for {}", + signature); + + std::string childrenTypes = signature.substr(start + 1, end - start - 1); // Split the types with delimiter. std::string delimiter = ","; std::size_t pos; std::vector types; - std::vector names; while ((pos = childrenTypes.find(delimiter)) != std::string::npos) { auto typeStr = childrenTypes.substr(0, pos); std::size_t endPos = pos; @@ -160,16 +151,44 @@ TypePtr VeloxSubstraitSignature::fromSubstraitSignature(const std::string& signa } } types.emplace_back(fromSubstraitSignature(typeStr)); - names.emplace_back(""); childrenTypes.erase(0, endPos + delimiter.length()); } if (childrenTypes.size() > 0 && !startWith(childrenTypes, ">")) { types.emplace_back(fromSubstraitSignature(childrenTypes)); + } + return types; + }; + + if (startWith(signature, "dec")) { + // Decimal type name is in the format of dec. + auto precisionStart = signature.find_first_of('<'); + auto tokenIndex = signature.find_first_of(','); + auto scaleEnd = signature.find_first_of('>'); + auto precision = stoi(signature.substr(precisionStart + 1, (tokenIndex - precisionStart - 1))); + auto scale = stoi(signature.substr(tokenIndex + 1, (scaleEnd - tokenIndex - 1))); + return DECIMAL(precision, scale); + } + + if (startWith(signature, "struct")) { + // Struct type name is in the format of struct. + auto types = parseNestedTypeSignature(signature); + std::vector names; + names.resize(types.size()); + for (int i = 0; i < types.size(); i++) { names.emplace_back(""); } return std::make_shared(std::move(names), std::move(types)); } + if (startWith(signature, "map")) { + // Map type name is in the format of map. + auto types = parseNestedTypeSignature(signature); + if (types.size() != 2) { + VELOX_UNSUPPORTED("Substrait type signature conversion to Velox type not supported for {}.", signature); + } + return MAP(std::move(types)[0], std::move(types)[1]); + } + if (startWith(signature, "list")) { auto listStart = signature.find_first_of('<'); auto listEnd = signature.find_last_of('>'); @@ -183,43 +202,6 @@ TypePtr VeloxSubstraitSignature::fromSubstraitSignature(const std::string& signa return ARRAY(elementType); } - if (startWith(signature, "map")) { - // Map type name is in the format of map. - auto mapStart = signature.find_first_of('<'); - auto mapEnd = signature.find_last_of('>'); - VELOX_CHECK( - mapEnd - mapStart > 1, - "Native validation failed due to: more information is needed to create MapType: {}", - signature); - std::string childrenTypes = signature.substr(mapStart + 1, mapEnd - mapStart - 1); - std::string delimiter = ","; - std::size_t pos; - std::vector types; - while ((pos = childrenTypes.find(delimiter)) != std::string::npos) { - auto typeStr = childrenTypes.substr(0, pos); - std::size_t endPos = pos; - if (startWith(typeStr, "dec") || startWith(typeStr, "struct") || startWith(typeStr, "map") || - startWith(typeStr, "list")) { - endPos = childrenTypes.find(">") + 1; - if (endPos > pos) { - typeStr += childrenTypes.substr(pos, endPos - pos); - } else { - typeStr += childrenTypes.substr(pos); - endPos = childrenTypes.size(); - } - } - types.emplace_back(fromSubstraitSignature(typeStr)); - childrenTypes.erase(0, endPos + delimiter.length()); - } - if (childrenTypes.size() > 0 && !startWith(childrenTypes, ">")) { - types.emplace_back(fromSubstraitSignature(childrenTypes)); - } - if (types.size() != 2) { - VELOX_UNSUPPORTED("Substrait type signature conversion to Velox type not supported for {}.", signature); - } - return MAP(std::move(types)[0], std::move(types)[1]); - } - VELOX_UNSUPPORTED("Substrait type signature conversion to Velox type not supported for {}.", signature); }