Skip to content

Commit

Permalink
[ESIMD] Infer address space of pointer that are passed through invoke…
Browse files Browse the repository at this point in the history
…_simd to ESIMD API to generate better code on BE (intel#14528)
  • Loading branch information
fineg74 authored Jul 17, 2024
1 parent 34b8e40 commit 16e39df
Show file tree
Hide file tree
Showing 3 changed files with 161 additions and 0 deletions.
59 changes: 59 additions & 0 deletions llvm/lib/SYCLLowerIR/LowerInvokeSimd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/GenXIntrinsics/GenXMetadata.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
Expand Down Expand Up @@ -259,6 +260,45 @@ void markFunctionAsESIMD(Function *F) {
}
}

void adjustAddressSpace(Function *F, uint32_t ArgNo, uint32_t ArgAddrSpace) {
Argument *Arg = F->getArg(ArgNo);
for (User *ArgUse : Arg->users()) {
Instruction *Instr = dyn_cast<Instruction>(ArgUse);
if (!Instr)
continue;
const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(ArgUse);
if (ASC) {
if (ASC->getDestAddressSpace() == ArgAddrSpace)
continue;
}

const CallInst *CI = dyn_cast<CallInst>(ArgUse);
if (CI) {
Function *Callee = CI->getCalledFunction();
if (!Callee || Callee->isDeclaration())
continue;

for (uint32_t i = 0; i < CI->getNumOperands(); ++i) {
if (CI->getOperand(i) == Arg) {
adjustAddressSpace(Callee, i, ArgAddrSpace);
}
}
} else {
for (unsigned int i = 0; i < ArgUse->getNumOperands(); ++i) {
if (ArgUse->getOperand(i) == Arg) {
PointerType *NPT = PointerType::get(Arg->getContext(), ArgAddrSpace);

auto *NewInstr = new AddrSpaceCastInst(ArgUse->getOperand(i), NPT);
NewInstr->insertBefore(Instr);
NewInstr->setDebugLoc(Instr->getDebugLoc());

ArgUse->setOperand(i, NewInstr);
}
}
}
}
}

// Process 'invoke_simd(sub_group_obj, f, spmd_args...);' call.
//
// If f is a function name or a function pointer, this call is lowered into
Expand Down Expand Up @@ -319,6 +359,25 @@ bool processInvokeSimdCall(CallInst *InvokeSimd,
SimdF->addFnAttr(INVOKE_SIMD_DIRECT_TARGET_ATTR);
}

if (!SimdF->isDeclaration()) {
// The real arguments for invoke_simd callee start at index 2.
for (uint32_t i = 2; i < InvokeSimd->arg_size(); ++i) {
const Value *Arg = InvokeSimd->getArgOperand(i);
if (Arg->getType()->isPointerTy()) {
uint32_t AddressSpace = Arg->getType()->getPointerAddressSpace();
if (AddressSpace == 4) {
const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(Arg);
if (!ASC)
continue;

AddressSpace =
ASC->getOperand(0)->getType()->getPointerAddressSpace();
}
adjustAddressSpace(SimdF, i - 2, AddressSpace);
}
}
}

// The invoke_simd target is known at compile-time - optimize.
// 1. find the call to f within the cloned helper - it is its first parameter
constexpr unsigned SimdCallTargetArgNo = 0;
Expand Down
16 changes: 16 additions & 0 deletions sycl/test-e2e/ESIMD/PerformanceTests/invoke_simd_smoke.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
//==------------- invoke_simd_smoke.cpp - DPC++ ESIMD on-device test----==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: gpu-intel-dg2 && level_zero
// UNSUPPORTED: windows

// RUN: mkdir -p %t.dir && %{build} -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr -o %t.dir/exec.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 IGC_DumpToCustomDir=%t.dir IGC_ShaderDumpEnable=1 %{run} %t.dir/exec.out
// RUN: python3 %S/instruction_count.py %t.dir 149 OCL_asmc2becd046944fa5f_simd16_entry_0001.asm
// RUN: echo "Baseline from driver version 1.3.29735"

#include "../../InvokeSimd/invoke_simd_smoke.cpp"
86 changes: 86 additions & 0 deletions sycl/test/invoke_simd/invoke_simd_address_space_inferral.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// RUN: %clangxx -fsycl -fsycl-device-only -Xclang -fsycl-allow-func-ptr -S %s -o %t.ll
// RUN: sycl-post-link -O2 -device-globals -properties -spec-const=native -split=auto -emit-only-kernels-as-entry-points -emit-param-info -symbols -emit-exported-symbols -emit-imported-symbols -lower-esimd -S %t.ll -o %t.table
// RUN: FileCheck %s -input-file=%t_0.ll

// The test validates proper address space inferral for a pointer passed to
// invoke_simd callee that is used for ESIMD API memory API

#include <sycl/detail/core.hpp>
#include <sycl/ext/intel/esimd.hpp>
#include <sycl/ext/oneapi/experimental/invoke_simd.hpp>
#include <sycl/ext/oneapi/experimental/uniform.hpp>
#include <sycl/usm.hpp>

#include <functional>
#include <iostream>
#include <type_traits>

using namespace sycl::ext::oneapi::experimental;
using namespace sycl;
namespace esimd = sycl::ext::intel::esimd;

constexpr int VL = 32;

__attribute__((always_inline)) void ESIMD_CALLEE(float *A, float *B,
int i) SYCL_ESIMD_FUNCTION {
esimd::simd<float, VL> a;
a.copy_from(A + i);
a.copy_to(B + i);
}

[[intel::device_indirectly_callable]] SYCL_EXTERNAL void __regcall SIMD_CALLEE1(
float *A, float *B, int i) SYCL_ESIMD_FUNCTION {
ESIMD_CALLEE(A, B, i);
}
bool test() {
constexpr unsigned Size = 1024;
constexpr unsigned GroupSize = 4 * VL;

queue q;

auto dev = q.get_device();
float *A = malloc_shared<float>(Size, q);

sycl::range<1> GlobalRange{Size};
// Number of workitems in each workgroup.
sycl::range<1> LocalRange{GroupSize};

sycl::nd_range<1> Range(GlobalRange, LocalRange);

try {
auto e = q.submit([&](handler &cgh) {
local_accessor<float, 1> LocalAcc(Size, cgh);
cgh.parallel_for(Range, [=](nd_item<1> item) [[intel::reqd_sub_group_size(
VL)]] {
sycl::group<1> g = item.get_group();
sycl::sub_group sg = item.get_sub_group();

unsigned int i = g.get_group_id() * g.get_local_range() +
sg.get_group_id() * sg.get_max_local_range();

invoke_simd(
sg, SIMD_CALLEE1, uniform{A},
uniform{LocalAcc.template get_multi_ptr<access::decorated::yes>()
.get()},
uniform{i});
});
});
e.wait();
} catch (sycl::exception const &e) {
std::cout << "SYCL exception caught: " << e.what() << '\n';
sycl::free(A, q);
return false;
}

sycl::free(A, q);

return 0;
// CHECK: addrspacecast ptr addrspace(4) %A to ptr addrspace(1)
// CHECK: addrspacecast ptr addrspace(4) %B to ptr addrspace(3)
}

int main() {
test();

return 0;
}

0 comments on commit 16e39df

Please sign in to comment.