Skip to content

Commit a1f7a26

Browse files
aratajewigcbot
authored andcommitted
Fix subroutine handling for intel_reqd_sub_group_size(32)
Previously, using `intel_reqd_sub_group_size(32)` on DG2 resulted in two redundant SIMD32 call instructions being generated in vISA, which could lead to unexpected issues. This change ensures that only a single SIMD32 call instruction is generated. All function arguments and return values are now correctly passed using two SIMD16 instructions, eliminating redundancy and improving
1 parent ce775e8 commit a1f7a26

File tree

3 files changed

+57
-6
lines changed

3 files changed

+57
-6
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -349,9 +349,8 @@ uint EmitPass::DecideInstanceAndSlice(const llvm::BasicBlock &blk, SDAG &sdag, b
349349
}
350350

351351
if (CallInst *callInst = dyn_cast<CallInst>(sdag.m_root)) {
352-
// Disable slicing for function calls
353-
Function *F = dyn_cast<Function>(IGCLLVM::getCalledValue(callInst));
354-
if (!F || F->hasFnAttribute("visaStackCall")) {
352+
// Disable slicing for function calls (stackcalls and subroutines)
353+
if (isUserFunctionCall(callInst)) {
355354
numInstance = 1;
356355
slicing = false;
357356
}
@@ -10712,7 +10711,7 @@ void EmitPass::emitCall(llvm::CallInst *inst) {
1071210711
// When both symbols are the same, then this argument passing has been
1071310712
// lifted to use a global vISA variable, just skip the copy.
1071410713
if (Dst != Src) {
10715-
emitCopyAll(Dst, Src, Arg.getType());
10714+
emitCopyAllInstances(Dst, Src, Arg.getType());
1071610715
}
1071710716
}
1071810717
m_currFuncHasSubroutine = true;
@@ -10723,7 +10722,7 @@ void EmitPass::emitCall(llvm::CallInst *inst) {
1072310722
if (!inst->use_empty()) {
1072410723
CVariable *Dst = GetSymbol(inst);
1072510724
CVariable *Src = m_currShader->getOrCreateReturnSymbol(F);
10726-
emitCopyAll(Dst, Src, inst->getType());
10725+
emitCopyAllInstances(Dst, Src, inst->getType());
1072710726
}
1072810727
}
1072910728

@@ -10742,7 +10741,7 @@ void EmitPass::emitReturn(llvm::ReturnInst *inst) {
1074210741
if (!RetTy->isVoidTy()) {
1074310742
CVariable *Dst = m_currShader->getOrCreateReturnSymbol(F);
1074410743
CVariable *Src = GetSymbol(inst->getReturnValue());
10745-
emitCopyAll(Dst, Src, RetTy);
10744+
emitCopyAllInstances(Dst, Src, RetTy);
1074610745
}
1074710746

1074810747
m_encoder->SubroutineRet(nullptr, F);
@@ -19167,6 +19166,13 @@ void EmitPass::emitCopyAll(CVariable *Dst, CVariable *Src, llvm::Type *Ty) {
1916719166
}
1916819167
}
1916919168

19169+
void EmitPass::emitCopyAllInstances(CVariable *Dst, CVariable *Src, llvm::Type *type) {
19170+
for (uint instance = 0; instance < Dst->GetNumberInstance(); instance++) {
19171+
m_encoder->SetSecondHalf(instance == 1 ? true : false);
19172+
emitCopyAll(Dst, Src, type);
19173+
}
19174+
}
19175+
1917019176
void EmitPass::emitSqrt(Instruction *inst) {
1917119177
GenIntrinsicInst *intrinCall = llvm::cast<GenIntrinsicInst>(inst);
1917219178
CVariable *src0 = GetSymbol(intrinCall->getArgOperand(0));

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,7 @@ class EmitPass : public llvm::FunctionPass {
439439
uint32_t SrcSubRegOffset = 0, bool allowLargerSIMDSize = false, CVariable *predicate = nullptr);
440440
void emitConstantVector(CVariable *Dst, uint64_t value = 0);
441441
void emitCopyAll(CVariable *Dst, CVariable *Src, llvm::Type *Ty);
442+
void emitCopyAllInstances(CVariable *Dst, CVariable *Src, llvm::Type *Ty);
442443

443444
void emitPredicatedVectorCopy(CVariable *Dst, CVariable *Src, CVariable *pred);
444445
void emitPredicatedVectorSelect(CVariable *Dst, CVariable *Src0, CVariable *Src1, CVariable *pred);
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2025 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
// REQUIRES: dg2-supported
10+
// RUN: ocloc compile -file %s -options "-igc_opts 'DumpVISAASMToConsole=1'" -device dg2 | FileCheck --check-prefix CHECK-VISA %s
11+
12+
// This test verifies that function calls as subroutines are properly handled when
13+
// intel_reqd_sub_group_size is equal to 32 on DG2. The final call instruction must be
14+
// in the SIMD width required by the user (32), but instructions that are responsible
15+
// for passing parameters to the subroutine and returning values from the subroutine
16+
// must be split into two SIMD16 instructions.
17+
18+
// CHECK-VISA: mov (M1, 16) [[A_PARAM_LO:.*]](0,0)<1> {{.*}}(0,0)<0;1,0>
19+
// CHECK-VISA: mov (M5, 16) [[A_PARAM_HI:.*]](0,0)<1> {{.*}}(0,0)<0;1,0>
20+
// CHECK-VISA: mov (M1, 16) [[B_PARAM_LO:.*]](0,0)<1> {{.*}}(0,0)<0;1,0>
21+
// CHECK-VISA: mov (M5, 16) [[B_PARAM_HI:.*]](0,0)<1> {{.*}}(0,0)<0;1,0>
22+
// CHECK-VISA: call (M1, 32) compute_sum_1
23+
// CHECK-VISA: mov (M1, 16) {{.*}}(0,0)<1> [[COMPUTE_SUM_RETVAL_LO:.*]](0,0)<1;1,0>
24+
// CHECK-VISA: mov (M5, 16) {{.*}}(0,0)<1> [[COMPUTE_SUM_RETVAL_HI:.*]](0,0)<1;1,0>
25+
26+
// CHECK-VISA: .function "compute_sum_1"
27+
// CHECK-VISA: compute_sum_1:
28+
// CHECK-VISA: add (M1, 16) [[RESULT_LO:.*]](0,0)<1> [[A_PARAM_LO]](0,0)<1;1,0> [[B_PARAM_LO]](0,0)<1;1,0>
29+
// CHECK-VISA: add (M5, 16) [[RESULT_HI:.*]](0,0)<1> [[A_PARAM_HI]](0,0)<1;1,0> [[B_PARAM_HI]](0,0)<1;1,0>
30+
// CHECK-VISA: mov (M1, 16) [[COMPUTE_SUM_RETVAL_LO]](0,0)<1> [[RESULT_LO]](0,0)<1;1,0>
31+
// CHECK-VISA: mov (M5, 16) [[COMPUTE_SUM_RETVAL_HI]](0,0)<1> [[RESULT_HI]](0,0)<1;1,0>
32+
33+
__attribute__((noinline))
34+
int compute_sum(int a, int b)
35+
{
36+
return a + b;
37+
}
38+
39+
__attribute__((intel_reqd_sub_group_size(32)))
40+
kernel void test_sum(global int* out, int x, int y)
41+
{
42+
int result = compute_sum(x, y);
43+
out[0] = result;
44+
}

0 commit comments

Comments
 (0)