From 30bf2dd8d198b04c99331aa6d4e389af66be3dbc Mon Sep 17 00:00:00 2001 From: Subhankar Shah Date: Sun, 15 Sep 2024 23:16:39 -0700 Subject: [PATCH] [XLA:TPU] Add LoopOptimizerBestFitHeap class that models alternate memory for memory bound loops and accounts for fragmentation. PiperOrigin-RevId: 675017830 --- xla/service/memory_space_assignment/BUILD | 2 + .../memory_bound_loop_optimizer.cc | 253 ++++++++++++++++++ .../memory_bound_loop_optimizer.h | 152 +++++++++++ .../memory_bound_loop_optimizer_test.cc | 181 +++++++++++++ 4 files changed, 588 insertions(+) diff --git a/xla/service/memory_space_assignment/BUILD b/xla/service/memory_space_assignment/BUILD index ab742bdfe6c07..5b9bfb46df578 100644 --- a/xla/service/memory_space_assignment/BUILD +++ b/xla/service/memory_space_assignment/BUILD @@ -426,6 +426,8 @@ cc_library( "//xla/service:hlo_buffer", "//xla/service:hlo_proto_cc", "//xla/service:hlo_value", + "//xla/service/heap_simulator", + "//xla/service/heap_simulator:allocation_block", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", diff --git a/xla/service/memory_space_assignment/memory_bound_loop_optimizer.cc b/xla/service/memory_space_assignment/memory_bound_loop_optimizer.cc index 9fadc9e60ad1b..9c4b1a2e8bd39 100644 --- a/xla/service/memory_space_assignment/memory_bound_loop_optimizer.cc +++ b/xla/service/memory_space_assignment/memory_bound_loop_optimizer.cc @@ -42,6 +42,8 @@ limitations under the License. #include "xla/hlo/ir/hlo_opcode.h" #include "xla/hlo/utils/hlo_live_range.h" #include "xla/service/buffer_value.h" +#include "xla/service/heap_simulator/allocation_block.h" +#include "xla/service/heap_simulator/heap_simulator.h" #include "xla/service/hlo.pb.h" #include "xla/service/hlo_alias_analysis.h" #include "xla/service/hlo_buffer.h" @@ -71,6 +73,257 @@ std::optional GetInstructionIndex( } // namespace +void LoopOptimizerBestFitHeap::CreateBufferInterval( + const AllocationBlock& allocation_block, + const AllocationBlock* colocated_with) { + buffer_intervals_[&allocation_block] = + BufferInterval({&allocation_block, + allocation_block.size, + allocation_block.inclusive_start_time, + allocation_block.end_time, + {}, + colocated_with == nullptr}); + if (colocated_with) { + buffer_intervals_[colocated_with].colocations.push_back(&allocation_block); + } +} + +std::optional +LoopOptimizerBestFitHeap::MaybeFindChunkCandidate( + const AllocationBlock& allocation_block, int64_t preferred_offset) { + Chunk chunk_candidate = FindChunkCandidate( + buffer_intervals_[&allocation_block], preferred_offset); + if (chunk_candidate.chunk_end() <= size_limit_per_heap_) { + return chunk_candidate; + } + return std::nullopt; +} + +std::optional +LoopOptimizerBestFitHeap::FindAndCommitChunkCandidate( + const AllocationBlock& allocation_block, int64_t preferred_offset) { + std::optional chunk = + MaybeFindChunkCandidate(allocation_block, preferred_offset); + if (chunk.has_value()) { + CommitChunk(buffer_intervals_[&allocation_block], chunk.value()); + } + return chunk; +} + +void LoopOptimizerBestFitHeap::RemoveChunk(int64_t start_time, int64_t end_time, + Chunk chunk) { + CHECK(interval_tree_.Remove(start_time, end_time, chunk)); +} + +void LoopOptimizerBestFitHeap::RemoveEvenChunks( + int64_t begin_idx_in_loop, int64_t end_idx_in_loop, + std::optional& chunk) { + RemoveChunk(begin_idx_in_loop, end_idx_in_loop, chunk.value()); + RemoveChunk(begin_idx_in_loop + 2 * loop_size_, + end_idx_in_loop + 2 * loop_size_, chunk.value()); +} + +void LoopOptimizerBestFitHeap::RemoveOddChunks( + int64_t begin_idx_in_loop, int64_t end_idx_in_loop, + std::optional& chunk) { + RemoveChunk(begin_idx_in_loop + loop_size_, end_idx_in_loop + loop_size_, + chunk.value()); + RemoveChunk(begin_idx_in_loop + 3 * loop_size_, + end_idx_in_loop + 3 * loop_size_, chunk.value()); +} + +void LoopOptimizerBestFitHeap::RemoveEvenOddChunkPair( + int64_t begin_idx_in_loop, int64_t end_idx_in_loop, + EvenOddChunkPair& chunks) { + CheckAllocationIntervalValid(begin_idx_in_loop, end_idx_in_loop); + ShiftAllocationIntervalIfRequired(begin_idx_in_loop, end_idx_in_loop); + auto [even_chunk, odd_chunk] = chunks; + RemoveEvenChunks(begin_idx_in_loop, end_idx_in_loop, even_chunk); + RemoveOddChunks(begin_idx_in_loop, end_idx_in_loop, odd_chunk); +} + +const AllocationBlock& LoopOptimizerBestFitHeap::GetAllocationBlock( + int64_t start_time, int64_t end_time, int64_t size) { + allocation_blocks_.push_back( + {start_time, end_time, size, static_cast(-1), + static_cast(-1), + static_cast(allocation_blocks_.size())}); + return allocation_blocks_.back(); +} + +const AllocationBlock& LoopOptimizerBestFitHeap::CreateEvenAllocationBlock( + int64_t begin_idx_in_loop, int64_t end_idx_in_loop, int64_t size) { + const AllocationBlock& first_allocation_block = + GetAllocationBlock(begin_idx_in_loop, end_idx_in_loop, size); + CreateBufferInterval(first_allocation_block); + const AllocationBlock& second_allocation_block = + GetAllocationBlock(begin_idx_in_loop + 2 * loop_size_, + end_idx_in_loop + 2 * loop_size_, size); + CreateBufferInterval(second_allocation_block, &first_allocation_block); + return first_allocation_block; +} + +const AllocationBlock& LoopOptimizerBestFitHeap::CreateOddAllocationBlock( + int64_t begin_idx_in_loop, int64_t end_idx_in_loop, int64_t size) { + const AllocationBlock& first_allocation_block = GetAllocationBlock( + begin_idx_in_loop + loop_size_, end_idx_in_loop + loop_size_, size); + CreateBufferInterval(first_allocation_block); + const AllocationBlock& second_allocation_block = + GetAllocationBlock(begin_idx_in_loop + 3 * loop_size_, + end_idx_in_loop + 3 * loop_size_, size); + CreateBufferInterval(second_allocation_block, &first_allocation_block); + return first_allocation_block; +} + +void LoopOptimizerBestFitHeap::CheckAllocationIntervalValid( + int64_t begin_idx_in_loop, int64_t end_idx_in_loop) const { + CHECK_LE(begin_idx_in_loop, end_idx_in_loop); + CHECK_LE(-1 * loop_size_, begin_idx_in_loop); + CHECK_LT(begin_idx_in_loop, loop_size_); + CHECK_LE(0, end_idx_in_loop); + CHECK_LT(end_idx_in_loop, 2 * loop_size_); + CHECK_LE(end_idx_in_loop - begin_idx_in_loop + 1, 2 * loop_size_); +} + +void LoopOptimizerBestFitHeap::ShiftAllocationIntervalIfRequired( + int64_t& begin_idx_in_loop, int64_t& end_idx_in_loop) const { + if (begin_idx_in_loop < 0) { + begin_idx_in_loop += loop_size_; + end_idx_in_loop += loop_size_; + } +} + +EvenOddChunkPair LoopOptimizerBestFitHeap::FindEvenAndOddAllocationBetween( + int64_t begin_idx_in_loop, int64_t end_idx_in_loop, int64_t size, + std::pair preferred_offsets) { + CheckAllocationIntervalValid(begin_idx_in_loop, end_idx_in_loop); + ShiftAllocationIntervalIfRequired(begin_idx_in_loop, end_idx_in_loop); + auto [even_offset, odd_offset] = preferred_offsets; + const AllocationBlock& even_allocation = + CreateEvenAllocationBlock(begin_idx_in_loop, end_idx_in_loop, size); + const AllocationBlock& odd_allocation = + CreateOddAllocationBlock(begin_idx_in_loop, end_idx_in_loop, size); + // We need to commit the even chunk because even and odd chunks might overlap + // in time. + std::optional even_chunk = + FindAndCommitChunkCandidate(even_allocation, even_offset); + if (!even_chunk.has_value()) { + return {std::nullopt, std::nullopt}; + } + std::optional odd_chunk = + MaybeFindChunkCandidate(odd_allocation, odd_offset); + RemoveEvenChunks(begin_idx_in_loop, end_idx_in_loop, even_chunk); + if (odd_chunk.has_value()) { + return {even_chunk, odd_chunk}; + } + return {std::nullopt, std::nullopt}; +} + +EvenOddChunkPair LoopOptimizerBestFitHeap::AllocateEvenAndOddBetween( + int64_t begin_idx_in_loop, int64_t end_idx_in_loop, int64_t size, + std::pair preferred_offsets) { + CheckAllocationIntervalValid(begin_idx_in_loop, end_idx_in_loop); + ShiftAllocationIntervalIfRequired(begin_idx_in_loop, end_idx_in_loop); + auto [even_offset, odd_offset] = preferred_offsets; + const AllocationBlock& even_allocation = + CreateEvenAllocationBlock(begin_idx_in_loop, end_idx_in_loop, size); + const AllocationBlock& odd_allocation = + CreateOddAllocationBlock(begin_idx_in_loop, end_idx_in_loop, size); + // We need to commit the even chunk because even and odd chunks might overlap + // in time. + std::optional even_chunk = + FindAndCommitChunkCandidate(even_allocation, even_offset); + if (!even_chunk.has_value()) { + return {std::nullopt, std::nullopt}; + } + std::optional odd_chunk = + FindAndCommitChunkCandidate(odd_allocation, odd_offset); + if (odd_chunk.has_value()) { + return {even_chunk, odd_chunk}; + } + // Remove even chunk if odd chunk was not found. + RemoveEvenChunks(begin_idx_in_loop, end_idx_in_loop, even_chunk); + return {std::nullopt, std::nullopt}; +} + +const AllocationBlock& +LoopOptimizerBestFitHeap::CreateSameEvenAndOddAllocationBlock( + int64_t begin_idx_in_loop, int64_t end_idx_in_loop, int64_t size) { + const AllocationBlock& first_allocation_block = + GetAllocationBlock(begin_idx_in_loop, end_idx_in_loop, size); + CreateBufferInterval(first_allocation_block); + const AllocationBlock& second_allocation_block = + GetAllocationBlock(begin_idx_in_loop + 1 * loop_size_, + end_idx_in_loop + 1 * loop_size_, size); + CreateBufferInterval(second_allocation_block, &first_allocation_block); + const AllocationBlock& third_allocation_block = + GetAllocationBlock(begin_idx_in_loop + 2 * loop_size_, + end_idx_in_loop + 2 * loop_size_, size); + CreateBufferInterval(third_allocation_block, &first_allocation_block); + const AllocationBlock& fourth_allocation_block = + GetAllocationBlock(begin_idx_in_loop + 3 * loop_size_, + end_idx_in_loop + 3 * loop_size_, size); + CreateBufferInterval(fourth_allocation_block, &first_allocation_block); + return first_allocation_block; +} + +EvenOddChunkPair LoopOptimizerBestFitHeap::FindSameEvenAndOddAllocationBetween( + int64_t begin_idx_in_loop, int64_t end_idx_in_loop, int64_t size, + int64_t preferred_offset) { + CheckAllocationIntervalValid(begin_idx_in_loop, end_idx_in_loop); + ShiftAllocationIntervalIfRequired(begin_idx_in_loop, end_idx_in_loop); + // An allocation that is colocated in even and odd iterations cannot be double + // buffered i.e. it should span less than or equal to one loop iteration). + CHECK_LE(end_idx_in_loop - begin_idx_in_loop + 1, loop_size_); + const AllocationBlock& allocation = CreateSameEvenAndOddAllocationBlock( + begin_idx_in_loop, end_idx_in_loop, size); + std::optional chunk = + MaybeFindChunkCandidate(allocation, preferred_offset); + return {chunk, chunk}; +} + +EvenOddChunkPair LoopOptimizerBestFitHeap::AllocateSameEvenAndOddBetween( + int64_t begin_idx_in_loop, int64_t end_idx_in_loop, int64_t size, + int64_t preferred_offset) { + CheckAllocationIntervalValid(begin_idx_in_loop, end_idx_in_loop); + ShiftAllocationIntervalIfRequired(begin_idx_in_loop, end_idx_in_loop); + // An allocation that is colocated in even and odd iterations cannot be double + // buffered i.e. it should span less than or equal to one loop iteration). + CHECK_LE(end_idx_in_loop - begin_idx_in_loop + 1, loop_size_); + const AllocationBlock& allocation = CreateSameEvenAndOddAllocationBlock( + begin_idx_in_loop, end_idx_in_loop, size); + std::optional chunk = + FindAndCommitChunkCandidate(allocation, preferred_offset); + return {chunk, chunk}; +} + +std::string LoopOptimizerBestFitHeap::MemoryUsageToAsciiArt( + int64_t begin_iteration, int64_t end_iteration) const { + CHECK_LE(0, begin_iteration); + CHECK_LE(begin_iteration, end_iteration); + return interval_tree_.NodesOverlappingInTimeToAsciiArt( + loop_size_ * begin_iteration, loop_size_ * (end_iteration + 1) - 1, + loop_size_); +} + +std::vector LoopOptimizerBestFitHeap::RemainingMemoryByTime() const { + // Only 2nd and 3rd iterations have the correct (and identical) memory usage. + // 1st and 4th iterations serve only to model the boundary conditions. + std::vector memory_used_by_time = + interval_tree_.MemoryUsedInInterval(loop_size_ * 2, loop_size_ * 3 - 1); + std::vector remaining_memory_by_time(loop_size_); + for (int i = 0; i < loop_size_; ++i) { + remaining_memory_by_time[i] = size_limit_per_heap_ - memory_used_by_time[i]; + } + return remaining_memory_by_time; +} + +int64_t LoopOptimizerBestFitHeap::LastMemoryOffsetOccupied() const { + // 2nd and 3rd iterations will suffice for getting the current alternate + // memory size. + return interval_tree_.HeapSizeInInterval(loop_size_ * 2, loop_size_ * 4 - 1); +} + /*static*/ absl::StatusOr> MemoryBoundLoopOptimizer::Create( int loop_start, int loop_end, uint64_t alternate_memory_size, diff --git a/xla/service/memory_space_assignment/memory_bound_loop_optimizer.h b/xla/service/memory_space_assignment/memory_bound_loop_optimizer.h index 002ece417ca5c..b18e15583f524 100644 --- a/xla/service/memory_space_assignment/memory_bound_loop_optimizer.h +++ b/xla/service/memory_space_assignment/memory_bound_loop_optimizer.h @@ -17,6 +17,7 @@ limitations under the License. #define XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_MEMORY_BOUND_LOOP_OPTIMIZER_H_ #include +#include #include #include #include @@ -30,6 +31,8 @@ limitations under the License. #include "xla/hlo/ir/hlo_instruction.h" #include "xla/hlo/utils/hlo_live_range.h" #include "xla/service/buffer_value.h" +#include "xla/service/heap_simulator/allocation_block.h" +#include "xla/service/heap_simulator/heap_simulator.h" #include "xla/service/hlo.pb.h" #include "xla/service/hlo_alias_analysis.h" #include "xla/service/hlo_buffer.h" @@ -44,6 +47,155 @@ limitations under the License. namespace xla { namespace memory_space_assignment { +// Pair of chunks for even and odd loop iterations. +using EvenOddChunkPair = std::pair, + std::optional>; + +// LoopOptimizerBestFitHeap extends GlobalDecreasingSizeBestFitHeap to track +// allocated buffers and their live intervals for the MemoryBoundLoopOptimizer. +// * We model 4 loop iterations. +// * The 0th and 2nd iterations are even. The 1st and 3rd are odd. +// * Allocations in even iterations are required to have the same offsets. +// Likewise, allocations in odd iterations are required to have the same +// offset. +// * Allocations may have different offsets between odd and even iterations. +// * Buffers can span up to 2 iterations. +// * The algorithm uses the 0th and 1st iterations to account for buffers that +// start in those iterations but are still alive in the 2nd and 3rd +// iterations. The 2nd and 3rd iterations are used to give the complete loop +// buffer picture. +class LoopOptimizerBestFitHeap + : public GlobalDecreasingSizeBestFitHeap { + public: + explicit LoopOptimizerBestFitHeap(uint64_t size_limit_per_heap, + int64_t loop_size, + int64_t alignment_in_bytes) + : GlobalDecreasingSizeBestFitHeap(alignment_in_bytes), + size_limit_per_heap_(size_limit_per_heap), + loop_size_(loop_size) {} + ~LoopOptimizerBestFitHeap() override = default; + + // Frees the memory space denoted by chunk from [begin_idx_in_loop, + // end_idx_in_loop] from all iterations. + void RemoveEvenOddChunkPair(int64_t begin_idx_in_loop, + int64_t end_idx_in_loop, + EvenOddChunkPair& chunks); + + // Displays the current memory usage vs time for 6 loop iterations by default. + // Note: The 0th and the 1st iterations are just to account for loop around + // for buffers that go across one or two loop boundaries. The 2nd and the 3rd + // iterations present the actual memory view of the allocation. The 4th and + // 5th iterations show buffers from previous two iterations that go across one + // or two loop boundaries. begin_iteration_idx and end_iteration_idx are both + // inclusive, 0 indexed. + std::string MemoryUsageToAsciiArt(int64_t begin_iteration = 0, + int64_t end_iteration = 5) const; + + // Returns a vector of size loop_size, where the i'th element denotes the + // available(unfragmented) alternate memory in bytes at loop_idx i. + std::vector RemainingMemoryByTime() const; + + // Returns an integer denoting the largest occupied memory location in the + // alternate memory. + int64_t LastMemoryOffsetOccupied() const; + + // Finds free memory chunks of size "size" between [begin_idx_in_loop, + // end_idx_in_loop] in the even and odd loop iterations, only if free chunks + // are found in both iterations. The even and odd iteration offsets may be + // different. + EvenOddChunkPair FindEvenAndOddAllocationBetween( + int64_t begin_idx_in_loop, int64_t end_idx_in_loop, int64_t size, + std::pair preferred_offsets = {-1, -1}); + + // Finds and reserves free memory chunks of size "size" between + // [begin_idx_in_loop, end_idx_in_loop] in the even and odd loop iterations, + // only if free chunks are found in both iterations. The even and odd + // iteration offsets may be different. + EvenOddChunkPair AllocateEvenAndOddBetween( + int64_t begin_idx_in_loop, int64_t end_idx_in_loop, int64_t size, + std::pair preferred_offsets = {-1, -1}); + + // Finds free memory chunks of size "size" between [begin_idx_in_loop, + // end_idx_in_loop] in the even and odd loop iterations, only if free chunks + // are found in both iterations. The even and odd iteration offsets are same. + EvenOddChunkPair FindSameEvenAndOddAllocationBetween( + int64_t begin_idx_in_loop, int64_t end_idx_in_loop, int64_t size, + int64_t preferred_offset = -1); + + // Finds and reserves free memory chunks of size "size" between + // [begin_idx_in_loop, end_idx_in_loop] in the even and odd loop iterations, + // only if free chunks are found in both iterations. The even and odd + // iteration offsets are same. + EvenOddChunkPair AllocateSameEvenAndOddBetween(int64_t begin_idx_in_loop, + int64_t end_idx_in_loop, + int64_t size, + int64_t preferred_offset = -1); + + private: + // REQUIRES: + // - begin_idx_in_loop <= end_idx_in_loop + // - begin_idx_in_loop is within [-loop_size loop_size) + // - end_idx_in_loop is within [0, 2 * loop_size) + // - end_idx_in_loop - begin_idx_in_loop + 1 <= 2 * loop_size (allocation + // colocated in even (or odd) iterations cannot span more than 2 loop + // iterations) + void CheckAllocationIntervalValid(int64_t begin_idx_in_loop, + int64_t end_idx_in_loop) const; + + // Shifts allocation interval at [begin_idx_in_loop, end_idx_in_loop] to + // [begin_idx_in_loop + loop_size, end_idx_in_loop + loop_size], if + // begin_idx_in_loop is negative. + void ShiftAllocationIntervalIfRequired(int64_t& begin_idx_in_loop, + int64_t& end_idx_in_loop) const; + + // Returns pointer to a newly created allocation block that is added to + // allocation_blocks_. + const AllocationBlock& GetAllocationBlock(int64_t start_time, + int64_t end_time, int64_t size); + + // Creates a BufferInterval corresponding to the AllocationBlock and adds it + // to buffer_intervals_. + void CreateBufferInterval(const AllocationBlock& allocation_block, + const AllocationBlock* colocated_with = nullptr); + + std::optional MaybeFindChunkCandidate( + const AllocationBlock& allocation_block, int64_t preferred_offset = -1); + + std::optional FindAndCommitChunkCandidate( + const AllocationBlock& allocation_block, int64_t preferred_offset = -1); + + void RemoveChunk(int64_t start_time, int64_t end_time, Chunk chunk); + + void RemoveEvenChunks(int64_t begin_idx_in_loop, int64_t end_idx_in_loop, + std::optional& chunk); + + void RemoveOddChunks(int64_t begin_idx_in_loop, int64_t end_idx_in_loop, + std::optional& chunk); + + // Creates colocated allocation blocks for loop interval [begin_idx_in_loop, + // end_idx_in_loop] in 0th, 1st, 2nd and 3rd loop iterations and returns one + // AllocationBlock. + const AllocationBlock& CreateSameEvenAndOddAllocationBlock( + int64_t begin_idx_in_loop, int64_t end_idx_in_loop, int64_t size); + + // Creates colocated allocation blocks for loop interval [begin_idx_in_loop, + // end_idx_in_loop] in 0th and 2nd loop iterations and returns one + // AllocationBlock. + const AllocationBlock& CreateEvenAllocationBlock(int64_t begin_idx_in_loop, + int64_t end_idx_in_loop, + int64_t size); + // Creates colocated allocation blocks for loop interval [begin_idx_in_loop, + // end_idx_in_loop] in 1st and 3rd loop iterations and returns one + // AllocationBlock. + const AllocationBlock& CreateOddAllocationBlock(int64_t begin_idx_in_loop, + int64_t end_idx_in_loop, + int64_t size); + + uint64_t size_limit_per_heap_; + int64_t loop_size_; + std::list allocation_blocks_; +}; + // An optimizer for unrolled memory-bound loops. It keeps track of alternate // memory capacity and default memory bandwidth to decide the allocations of // each tensor within a loop iteration. The assumption is that all of the diff --git a/xla/service/memory_space_assignment/memory_bound_loop_optimizer_test.cc b/xla/service/memory_space_assignment/memory_bound_loop_optimizer_test.cc index 1dff5221026f8..a5df6154afd34 100644 --- a/xla/service/memory_space_assignment/memory_bound_loop_optimizer_test.cc +++ b/xla/service/memory_space_assignment/memory_bound_loop_optimizer_test.cc @@ -66,6 +66,8 @@ namespace xla { namespace memory_space_assignment { namespace { +using ::testing::ContainerEq; +using ::testing::HasSubstr; constexpr int64_t kPointerSize = 8; int64_t ShapeSize(const Shape& shape) { @@ -84,6 +86,185 @@ int64_t ReservedScopedMemoryFn( return 0; } +class LoopOptimizerBestFitHeapTest : public ::testing::Test { + public: + LoopOptimizerBestFitHeapTest() + : heap_(/*size_limit_per_heap=*/64, /*loop_size=*/6, + /*alignment_in_bytes=*/8) {} + + bool IsAllocateSameEvenAndOddBetweenSuccessful(int64_t begin_idx_in_loop, + int64_t end_idx_in_loop, + int64_t size) { + EvenOddChunkPair chunks = heap_.AllocateSameEvenAndOddBetween( + begin_idx_in_loop, end_idx_in_loop, size); + return chunks.first.has_value() && chunks.second.has_value(); + } + + bool CanFindSameEvenAndOddAllocationBetween(int64_t begin_idx_in_loop, + int64_t end_idx_in_loop, + int64_t size) { + EvenOddChunkPair chunks = heap_.FindSameEvenAndOddAllocationBetween( + begin_idx_in_loop, end_idx_in_loop, size); + return chunks.first.has_value() && chunks.second.has_value(); + } + + bool IsAllocateEvenAndOddBetweenSuccessful(int64_t begin_idx_in_loop, + int64_t end_idx_in_loop, + int64_t size) { + EvenOddChunkPair chunks = heap_.AllocateEvenAndOddBetween( + begin_idx_in_loop, end_idx_in_loop, size); + return chunks.first.has_value() && chunks.second.has_value(); + } + + bool CanFindEvenAndOddAllocationBetween(int64_t begin_idx_in_loop, + int64_t end_idx_in_loop, + int64_t size) { + EvenOddChunkPair chunks = heap_.FindEvenAndOddAllocationBetween( + begin_idx_in_loop, end_idx_in_loop, size); + return chunks.first.has_value() && chunks.second.has_value(); + } + + std::string GetMemoryUsageAsciiArt() { return heap_.MemoryUsageToAsciiArt(); } + + protected: + LoopOptimizerBestFitHeap heap_; +}; + +TEST_F(LoopOptimizerBestFitHeapTest, TestAllocateSameEvenAndOddBetween) { + EXPECT_TRUE(IsAllocateSameEvenAndOddBetweenSuccessful(3, 8, 16)); + EXPECT_TRUE(IsAllocateSameEvenAndOddBetweenSuccessful(-3, 2, 16)); + EXPECT_TRUE(IsAllocateSameEvenAndOddBetweenSuccessful(0, 2, 16)); + EXPECT_TRUE(IsAllocateSameEvenAndOddBetweenSuccessful(3, 5, 16)); + EXPECT_EQ(heap_.LastMemoryOffsetOccupied(), 48); + EXPECT_TRUE(IsAllocateSameEvenAndOddBetweenSuccessful(0, 5, 16)); + EXPECT_FALSE(IsAllocateSameEvenAndOddBetweenSuccessful(0, 5, 16)); + EXPECT_EQ(heap_.LastMemoryOffsetOccupied(), 64); + EXPECT_THAT(heap_.RemainingMemoryByTime(), + ContainerEq(std::vector{0, 0, 0, 0, 0, 0})); + std::string memory_usage = heap_.MemoryUsageToAsciiArt(2, 3); + // Expected memory usage ascii art string - + // Memory map for time: [12,23], memory_block_size: 16, group_size: 6 + // + // ###### ###### 64 + // ###### ###### 48 + // ###### ###### 32 + // ###### ###### 16 + // 234567 890123 + EXPECT_THAT(memory_usage, HasSubstr("Memory map for time: [12,23], " + "memory_block_size: 16, group_size: 6")); + EXPECT_THAT(memory_usage, HasSubstr("###### ###### 64")); + EXPECT_THAT(memory_usage, HasSubstr("###### ###### 48")); + EXPECT_THAT(memory_usage, HasSubstr("###### ###### 32")); + EXPECT_THAT(memory_usage, HasSubstr("###### ###### 16")); + EXPECT_THAT(memory_usage, HasSubstr("234567 890123")); +} + +TEST_F(LoopOptimizerBestFitHeapTest, TestAllocateEvenAndOddBetween) { + EXPECT_TRUE(IsAllocateEvenAndOddBetweenSuccessful(3, 11, 16)); + EXPECT_EQ(heap_.LastMemoryOffsetOccupied(), 32); + EXPECT_TRUE(IsAllocateEvenAndOddBetweenSuccessful(-3, 8, 16)); + EXPECT_EQ(heap_.LastMemoryOffsetOccupied(), 64); + EXPECT_THAT(heap_.RemainingMemoryByTime(), + ContainerEq(std::vector{16, 16, 16, 0, 0, 0})); + std::string memory_usage = heap_.MemoryUsageToAsciiArt(); + // Expected memory usage ascii art string - + // Memory map for time: [0,35], memory_block_size: 16, group_size: 6 + // + // ...... ...### ###### ###### ###### ###... 64 + // ...### ###### ###### ###### ###... ...... 48 + // ...... ...### ###### ...### ###### ...... 32 + // ...### ###### ...### ###### ...... ...... 16 + // 012345 678901 234567 890123 456789 012345 + EXPECT_THAT( + memory_usage, + HasSubstr( + "Memory map for time: [0,35], memory_block_size: 16, group_size: 6")); + EXPECT_THAT(memory_usage, + HasSubstr("...... ...### ###### ###### ###### ###... 64")); + EXPECT_THAT(memory_usage, + HasSubstr("...### ###### ###### ###### ###... ...... 48")); + EXPECT_THAT(memory_usage, + HasSubstr("...... ...### ###### ...### ###### ...... 32")); + EXPECT_THAT(memory_usage, + HasSubstr("...### ###### ...### ###### ...... ...... 16")); + EXPECT_THAT(memory_usage, + HasSubstr("012345 678901 234567 890123 456789 012345")); +} + +TEST_F(LoopOptimizerBestFitHeapTest, TestRemoveChunk) { + EvenOddChunkPair chunks = heap_.AllocateEvenAndOddBetween(3, 11, 16); + EXPECT_TRUE(chunks.first.has_value() && chunks.second.has_value()); + EvenOddChunkPair second_chunks = heap_.AllocateEvenAndOddBetween(-3, 8, 16); + EXPECT_TRUE(second_chunks.first.has_value() && + second_chunks.second.has_value()); + EXPECT_THAT(heap_.RemainingMemoryByTime(), + ContainerEq(std::vector{16, 16, 16, 0, 0, 0})); + EXPECT_EQ(heap_.LastMemoryOffsetOccupied(), 64); + std::string memory_usage = heap_.MemoryUsageToAsciiArt(2, 3); + // Expected memory usage ascii art string - + // Memory map for time: [12,23], memory_block_size: 16, group_size: 6 + // + // ###### ###### 64 + // ###### ###### 48 + // ###### ...### 32 + // ...### ###### 16 + // 234567 890123 + EXPECT_THAT(memory_usage, HasSubstr("Memory map for time: [12,23], " + "memory_block_size: 16, group_size: 6")); + EXPECT_THAT(memory_usage, HasSubstr("###### ###### 64")); + EXPECT_THAT(memory_usage, HasSubstr("###### ###### 48")); + EXPECT_THAT(memory_usage, HasSubstr("###### ...### 32")); + EXPECT_THAT(memory_usage, HasSubstr("...### ###### 16")); + EXPECT_THAT(memory_usage, HasSubstr("234567 890123")); + // We must 16 bytes of free memory in [0,2] with different offsets in even and + // odd iterations. + EXPECT_TRUE(CanFindEvenAndOddAllocationBetween(0, 2, 16)); + // We must not find 16 bytes of free memory in [0,2] with same offsets in even + // and odd iterations. + EXPECT_FALSE(IsAllocateSameEvenAndOddBetweenSuccessful(0, 2, 16)); + EXPECT_FALSE(CanFindEvenAndOddAllocationBetween(0, 11, 16)); + heap_.RemoveEvenOddChunkPair(3, 11, chunks); + // We must find 16 bytes of free memory spanning 2 loop iterations with + // different offsets in even and odd iterations. It does not matter what time + // is picked as the start time as long as the span is less than or equal to 2 + // iterations. + EXPECT_TRUE(CanFindEvenAndOddAllocationBetween(0, 11, 16)); + EXPECT_TRUE(CanFindEvenAndOddAllocationBetween(-3, 8, 16)); + // We must find 32 bytes of free memory less than or equal to one iteration + // with different offsets in even and odd iterations. It does not matter what + // time is picked as the start time. + EXPECT_TRUE(CanFindEvenAndOddAllocationBetween(0, 5, 32)); + EXPECT_TRUE(CanFindEvenAndOddAllocationBetween(-1, 4, 32)); + EXPECT_TRUE(CanFindEvenAndOddAllocationBetween(2, 7, 32)); + // Spans more than one iteration. + EXPECT_FALSE(CanFindEvenAndOddAllocationBetween(0, 6, 32)); + // We must be able to find 32 bytes of free memory spanning less than or equal + // to one iteration with same offsets in even and odd iterations. It does not + // matter what time is picked as the start time. + EXPECT_TRUE(CanFindSameEvenAndOddAllocationBetween(0, 5, 32)); + EXPECT_TRUE(CanFindSameEvenAndOddAllocationBetween(-1, 4, 32)); + EXPECT_TRUE(CanFindSameEvenAndOddAllocationBetween(2, 7, 32)); + std::string updated_memory_usage = heap_.MemoryUsageToAsciiArt(2, 3); + // Expected updated memory usage ascii art string - + // Memory map for time: [12,23], memory_block_size: 16, group_size: 6 + // + // ###### ###### 64 + // ###### ###### 48 + // ...... ...... 32 + // ...... ...... 16 + // 234567 890123 + EXPECT_THAT(updated_memory_usage, + HasSubstr("Memory map for time: [12,23], " + "memory_block_size: 16, group_size: 6")); + EXPECT_THAT(updated_memory_usage, HasSubstr("###### ###### 64")); + EXPECT_THAT(updated_memory_usage, HasSubstr("###### ###### 48")); + EXPECT_THAT(updated_memory_usage, HasSubstr("...... ...... 32")); + EXPECT_THAT(updated_memory_usage, HasSubstr("...... ...... 16")); + EXPECT_THAT(updated_memory_usage, HasSubstr("234567 890123")); + heap_.RemoveEvenOddChunkPair(-3, 8, second_chunks); + EXPECT_EQ(heap_.LastMemoryOffsetOccupied(), 0); +} + class MemoryBoundLoopOptimizerTest : public HloTestBase { public: MemoryBoundLoopOptimizerTest() = default;