|
| 1 | +//==--------------------- LinearGraphOptimization.cpp ----------------------==// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | + |
| 9 | +// Test for linear graph optimization which skips creating and tracking UR sync |
| 10 | +// points. Optimization is an internal implementation detail, validated through |
| 11 | +// inspecting private members of exec_graph_impl. Test achieves two goals: 1) |
| 12 | +// Validates that linear partitions in graphs are optimized to avoid using UR |
| 13 | +// sync points 2) Validates that non-linear partitions contain the expected |
| 14 | +// number of sync points |
| 15 | + |
| 16 | +#include "Common.hpp" |
| 17 | +#include <optional> |
| 18 | + |
| 19 | +using namespace sycl; |
| 20 | +using namespace sycl::ext::oneapi::experimental; |
| 21 | +using namespace sycl::ext::oneapi::experimental::detail; |
| 22 | + |
| 23 | +// Helper to build a linear chain of N kernels on a queue inside graph capture. |
| 24 | +static void BuildLinearChain(queue &Queue, bool IsInOrderQueue, int N) { |
| 25 | + std::optional<sycl::event> Event; |
| 26 | + for (int I = 0; I < N; ++I) { |
| 27 | + if (IsInOrderQueue) { |
| 28 | + experimental::single_task<TestKernel>(Queue, []() {}); |
| 29 | + } else { |
| 30 | + Event = Queue.submit([&](handler &h) { |
| 31 | + if (Event) { |
| 32 | + h.depends_on(*Event); |
| 33 | + } |
| 34 | + h.single_task<TestKernel>([]() {}); |
| 35 | + }); |
| 36 | + } |
| 37 | + } |
| 38 | +} |
| 39 | + |
| 40 | +// Validate linear optimization invariants on an executable graph. |
| 41 | +static void ValidateLinearExec(exec_graph_impl &Impl, int NumLinearChains) { |
| 42 | + EXPECT_EQ(GraphImplTest::NumPartitionsInOrder(Impl), NumLinearChains); |
| 43 | + EXPECT_EQ(GraphImplTest::NumSyncPoints(Impl), 0); |
| 44 | +} |
| 45 | + |
| 46 | +TEST_F(CommandGraphTest, LinearInOrderQueue) { |
| 47 | + sycl::property_list Props{sycl::property::queue::in_order{}}; |
| 48 | + queue InOrderQ{Dev, Props}; |
| 49 | + |
| 50 | + experimental::command_graph<graph_state::modifiable> G{InOrderQ.get_context(), |
| 51 | + InOrderQ.get_device()}; |
| 52 | + G.begin_recording(InOrderQ); |
| 53 | + BuildLinearChain(InOrderQ, /*IsInOrderQueue=*/true, /*N=*/3); |
| 54 | + InOrderQ.submit([&](sycl::handler &cgh) { cgh.host_task([]() {}); }); |
| 55 | + BuildLinearChain(InOrderQ, /*IsInOrderQueue=*/true, /*N=*/4); |
| 56 | + G.end_recording(InOrderQ); |
| 57 | + |
| 58 | + auto Exec = G.finalize(); |
| 59 | + auto &Impl = *getSyclObjImpl(Exec); |
| 60 | + ValidateLinearExec(Impl, /*InOrderPartitions=*/3); |
| 61 | +} |
| 62 | + |
| 63 | +TEST_F(CommandGraphTest, LinearOutOfOrderQueue) { |
| 64 | + // Out-of-order queue but we submit a strict linear dependency chain by |
| 65 | + // adding explicit depends_on between each node to achieve linearity. |
| 66 | + queue OOOQ{Dev}; |
| 67 | + experimental::command_graph<graph_state::modifiable> G{OOOQ.get_context(), |
| 68 | + OOOQ.get_device()}; |
| 69 | + G.begin_recording(OOOQ); |
| 70 | + BuildLinearChain(OOOQ, /*IsInOrderQueue=*/false, /*N=*/6); |
| 71 | + G.end_recording(OOOQ); |
| 72 | + |
| 73 | + auto Exec = G.finalize(); |
| 74 | + auto &Impl = *getSyclObjImpl(Exec); |
| 75 | + ValidateLinearExec(Impl, /*InOrderPartitions=*/1); |
| 76 | +} |
| 77 | + |
| 78 | +// Ensures non-linear graphs are creating and tracking sync points internally |
| 79 | +// for proper scheduling and that the linear optimization is not improperly |
| 80 | +// applied. |
| 81 | +TEST_F(CommandGraphTest, NonLinearOutOfOrderQueue) { |
| 82 | + queue Q{Dev}; |
| 83 | + experimental::command_graph<graph_state::modifiable> G{Q.get_context(), |
| 84 | + Q.get_device()}; |
| 85 | + G.begin_recording(Q); |
| 86 | + // Root node |
| 87 | + event Root = Q.submit([&](handler &h) { h.single_task<TestKernel>([] {}); }); |
| 88 | + // Two parallel branches depending on Root |
| 89 | + event A = Q.submit([&](handler &h) { |
| 90 | + h.depends_on(Root); |
| 91 | + h.single_task<TestKernel>([] {}); |
| 92 | + }); |
| 93 | + event B = Q.submit([&](handler &h) { |
| 94 | + h.depends_on(Root); |
| 95 | + h.single_task<TestKernel>([] {}); |
| 96 | + }); |
| 97 | + // Join node depends on both A and B |
| 98 | + Q.submit([&](handler &h) { |
| 99 | + h.depends_on(A); |
| 100 | + h.depends_on(B); |
| 101 | + h.single_task<TestKernel>([] {}); |
| 102 | + }); |
| 103 | + G.end_recording(Q); |
| 104 | + |
| 105 | + auto Exec = G.finalize(); |
| 106 | + auto &Impl = *getSyclObjImpl(Exec); |
| 107 | + |
| 108 | + const int NumLinear = GraphImplTest::NumPartitionsInOrder(Impl); |
| 109 | + const int NumSyncPoints = GraphImplTest::NumSyncPoints(Impl); |
| 110 | + |
| 111 | + // We should track a sync point per node for a total of 4 |
| 112 | + EXPECT_EQ(NumSyncPoints, 4); |
| 113 | + EXPECT_EQ(NumLinear, 0); |
| 114 | +} |
0 commit comments