From 1e2258ac62cc7d2e6a1dd208c687e5283fe8c88d Mon Sep 17 00:00:00 2001 From: Jimmy Lu Date: Tue, 5 Dec 2023 07:15:17 -0800 Subject: [PATCH] Copy input in json_parse to avoid ASAN error (#7858) Summary: `value` is put on offset 32 of the stack frame, and the total frame size is 88. In case `value` is inlined, and SIMD register is 64 bytes (e.g. AVX512), simdjson is reading the memory from offset 36 to 100, which exceeds the frame boundary. This caused some ASAN error, but no issue in production, because the padding data is not really used or changed. Fix it by asking `simdjson::dom::parser::parse` to copy the input to internal padded memory owned by parser. There is some minor performance loss but is probably not noticeable at query level. Reviewed By: mbasmanova Differential Revision: D51810542 --- velox/dwio/common/SelectiveRepeatedColumnReader.cpp | 2 ++ velox/functions/prestosql/JsonFunctions.cpp | 7 ++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/velox/dwio/common/SelectiveRepeatedColumnReader.cpp b/velox/dwio/common/SelectiveRepeatedColumnReader.cpp index 5ff6d800506c6..04c9e5baa1fa6 100644 --- a/velox/dwio/common/SelectiveRepeatedColumnReader.cpp +++ b/velox/dwio/common/SelectiveRepeatedColumnReader.cpp @@ -318,7 +318,9 @@ void SelectiveMapColumnReader::read( makeNestedRowSet(activeRows, rows.back()); if (keyReader_ && elementReader_ && !nestedRows_.empty()) { keyReader_->read(keyReader_->readOffset(), nestedRows_, nullptr); + LOG(INFO) << nestedRows_.size(); nestedRows_ = keyReader_->outputRows(); + LOG(INFO) << nestedRows_.size(); if (!nestedRows_.empty()) { elementReader_->read(elementReader_->readOffset(), nestedRows_, nullptr); } diff --git a/velox/functions/prestosql/JsonFunctions.cpp b/velox/functions/prestosql/JsonFunctions.cpp index 8e9cb0e9d4c4d..fbe75d0c7bd24 100644 --- a/velox/functions/prestosql/JsonFunctions.cpp +++ b/velox/functions/prestosql/JsonFunctions.cpp @@ -86,10 +86,10 @@ class JsonParseFunction : public exec::VectorFunction { // validation of JSON syntax that doesn't allocate memory or copy data. assert(args.size() > 0); const auto& arg = args[0]; - static_assert(simdjson::SIMDJSON_PADDING <= AlignedBuffer::kPaddedSize); if (arg->isConstantEncoding()) { auto value = arg->as>()->valueAt(0); - auto parsed = parser_.parse(value.data(), value.size(), false); + // Ask the parser to copy the input in case `value' is inlined. + auto parsed = parser_.parse(value.data(), value.size(), true); if (parsed.error() != simdjson::SUCCESS) { context.setErrors(rows, errors_[parsed.error()]); return; @@ -104,7 +104,8 @@ class JsonParseFunction : public exec::VectorFunction { rows.applyToSelected([&](auto row) { auto value = flatInput->valueAt(row); - auto parsed = parser_.parse(value.data(), value.size(), false); + // Ask the parser to copy the input in case `value' is inlined. + auto parsed = parser_.parse(value.data(), value.size(), true); if (parsed.error() != simdjson::SUCCESS) { context.setVeloxExceptionError(row, errors_[parsed.error()]); }