From ef025990f91465a2e313b289c5e0e574d4167ab1 Mon Sep 17 00:00:00 2001 From: Orri Erling Date: Tue, 3 Oct 2023 14:02:02 -0700 Subject: [PATCH] Fix cache hit size reporting for CacheInputStream window (#6868) Summary: RAM read size is overreported for hitting file footer cache which is done for tiny windows over a larger CacheInputStream. If the hitting CacheInputStream has a windowintersect the window with the cache entry's region and use the intersection size as the hit size. Pull Request resolved: https://github.com/facebookincubator/velox/pull/6868 Reviewed By: xiaoxmeng, Yuhta Differential Revision: D49870201 Pulled By: oerling fbshipit-source-id: 01788392101785873dd6734c8f295c682c3b3373 --- velox/dwio/common/CacheInputStream.cpp | 15 ++++++++++++--- velox/dwio/dwrf/test/CacheInputTest.cpp | 2 ++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/velox/dwio/common/CacheInputStream.cpp b/velox/dwio/common/CacheInputStream.cpp index 9c10c590f096..c9665926a4eb 100644 --- a/velox/dwio/common/CacheInputStream.cpp +++ b/velox/dwio/common/CacheInputStream.cpp @@ -162,15 +162,24 @@ std::vector> makeRanges( return buffers; } } // namespace - void CacheInputStream::loadSync(Region region) { // rawBytesRead is the number of bytes touched. Whether they come // from disk, ssd or memory is itemized in different counters. A process::TraceContext trace("loadSync"); + int64_t hitSize = region.length; + if (window_.has_value()) { + int64_t regionEnd = region.offset + region.length; + int64_t windowStart = region_.offset + window_.value().offset; + int64_t windowEnd = + region_.offset + window_.value().offset + window_.value().length; + hitSize = std::min(windowEnd, regionEnd) - + std::max(windowStart, region.offset); + } + // coalesced read from InputStream removes itself from this count // so as not to double count when the individual parts are // hit. - ioStats_->incRawBytesRead(region.length); + ioStats_->incRawBytesRead(hitSize); prefetchStarted_ = false; do { folly::SemiFuture wait(false); @@ -212,7 +221,7 @@ void CacheInputStream::loadSync(Region region) { } else { // Hit memory cache. if (!entry->getAndClearFirstUseFlag()) { - ioStats_->ramHit().increment(region.length); + ioStats_->ramHit().increment(hitSize); } return; } diff --git a/velox/dwio/dwrf/test/CacheInputTest.cpp b/velox/dwio/dwrf/test/CacheInputTest.cpp index b891e290e9ce..0b841ce3f0c0 100644 --- a/velox/dwio/dwrf/test/CacheInputTest.cpp +++ b/velox/dwio/dwrf/test/CacheInputTest.cpp @@ -495,6 +495,7 @@ TEST_F(CacheTest, window) { auto clone = cacheInput->clone(); clone->Skip(100); clone->setRemainingBytes(kMB); + auto previousRead = ioStats_->rawBytesRead(); EXPECT_TRUE(clone->Next(&buffer, &size)); // Half MB minus the 100 bytes skipped above should be left in the first load // quantum of 8MB. @@ -503,6 +504,7 @@ TEST_F(CacheTest, window) { EXPECT_EQ(kMB / 2 + 100, size); // There should be no more data in the window. EXPECT_FALSE(clone->Next(&buffer, &size)); + EXPECT_EQ(kMB, ioStats_->rawBytesRead() - previousRead); } TEST_F(CacheTest, bufferedInput) {