From 470a67e745fc2369fa46259660794a40c881f18d Mon Sep 17 00:00:00 2001 From: Jialiang Tan Date: Sun, 6 Oct 2024 16:11:09 -0700 Subject: [PATCH] Fix Presto serde bug when deserializing large payload --- velox/serializers/PrestoSerializer.cpp | 13 ++++++++++--- velox/serializers/tests/PrestoSerializerTest.cpp | 9 +++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/velox/serializers/PrestoSerializer.cpp b/velox/serializers/PrestoSerializer.cpp index 4036e47912667..6020651c5c472 100644 --- a/velox/serializers/PrestoSerializer.cpp +++ b/velox/serializers/PrestoSerializer.cpp @@ -4220,12 +4220,19 @@ void PrestoVectorSerde::deserialize( auto compressBuf = folly::IOBuf::create(header.compressedSize); source->readBytes(compressBuf->writableData(), header.compressedSize); compressBuf->append(header.compressedSize); + + // Process chained uncompressed results IOBufs. auto uncompress = codec->uncompress(compressBuf.get(), header.uncompressedSize); - ByteRange byteRange{ - uncompress->writableData(), (int32_t)uncompress->length(), 0}; + std::vector byteRanges; + auto* current = uncompress.get(); + do { + byteRanges.push_back( + {current->writableData(), (int32_t)current->length(), 0}); + current = current->next(); + } while (current != uncompress.get()); auto uncompressedSource = - std::make_unique(std::vector{byteRange}); + std::make_unique(std::move(byteRanges)); readTopColumns( *uncompressedSource, type, pool, *result, resultOffset, prestoOptions); } diff --git a/velox/serializers/tests/PrestoSerializerTest.cpp b/velox/serializers/tests/PrestoSerializerTest.cpp index 9f4817a00c76b..d195ca5f79a29 100644 --- a/velox/serializers/tests/PrestoSerializerTest.cpp +++ b/velox/serializers/tests/PrestoSerializerTest.cpp @@ -775,6 +775,15 @@ TEST_P(PrestoSerializerTest, basic) { testRoundTrip(rowVector); } +TEST_P(PrestoSerializerTest, basicLarge) { + const vector_size_t numRows = 80'000; + auto rowVector = makeRowVector( + {makeFlatVector(numRows, [](vector_size_t row) { return row; }), + makeFlatVector( + numRows, [](vector_size_t row) { return std::string(1024, 'x'); })}); + testRoundTrip(rowVector); +} + /// Test serialization of a dictionary vector that adds nulls to the base /// vector. TEST_P(PrestoSerializerTest, dictionaryWithExtraNulls) {