Skip to content

Commit 5a72fca

Browse files
vmustyafda0
authored andcommitted
Fix indirect pointer detection for structs in VC
If the pointer is the first member of the struct, the compiler would incorrectly mark it as a byptr argument. The correct way is to mark it as a byvalue argument with is_ptr set to true. (cherry picked from commit e572b3b)
1 parent e4cb0cc commit 5a72fca

File tree

7 files changed

+159
-11
lines changed

7 files changed

+159
-11
lines changed

IGC/AdaptorOCL/ocl_igc_shared/indirect_access_detection/version.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ SPDX-License-Identifier: MIT
1313

1414
#pragma once
1515

16-
const uint32_t INDIRECT_ACCESS_DETECTION_VERSION = 7;
16+
const uint32_t INDIRECT_ACCESS_DETECTION_VERSION = 8;

IGC/VectorCompiler/igcdeps/src/cmc.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ void CMKernel::createPointerGlobalAnnotation(const KernelArgInfo &ArgInfo,
380380
const auto BTI = ArgInfo.getBTI();
381381
const auto Size = ArgInfo.getSizeInBytes();
382382
const auto SourceOffset = ArgInfo.getOffsetInArg();
383+
const bool IsLinearization = SourceOffset != 0 || ArgInfo.getArgNo() != Index;
383384

384385
auto PtrAnnotation = std::make_unique<PointerArgumentAnnotation>();
385386

@@ -398,24 +399,24 @@ void CMKernel::createPointerGlobalAnnotation(const KernelArgInfo &ArgInfo,
398399

399400
PreDefinedAttrGetter::ArgAddrMode ZeAddrMode;
400401
if (AddrMode == ArgAddressMode::Bindless) {
401-
IGC_ASSERT(SourceOffset == 0);
402+
IGC_ASSERT(!IsLinearization);
402403
ZeAddrMode = PreDefinedAttrGetter::ArgAddrMode::bindless;
403404
} else if (AddrMode == ArgAddressMode::Stateful) {
404-
IGC_ASSERT(SourceOffset == 0);
405+
IGC_ASSERT(!IsLinearization);
405406
ZeAddrMode = PreDefinedAttrGetter::ArgAddrMode::stateful;
406407
} else {
407408
IGC_ASSERT(AddrMode == ArgAddressMode::Stateless);
408409
ZeAddrMode = PreDefinedAttrGetter::ArgAddrMode::stateless;
409410
}
410411

411-
if (SourceOffset == 0) {
412-
ZEInfoBuilder::addPayloadArgumentByPointer(
413-
m_kernelInfo.m_zePayloadArgs, Offset, Size, Index, ZeAddrMode,
414-
PreDefinedAttrGetter::ArgAddrSpace::global, getZEArgAccessType(Access));
415-
} else { // Pass the argument as by_value with is_ptr = true
412+
if (IsLinearization) { // Pass the argument as by_value with is_ptr = true
416413
IGC_ASSERT(AddrMode == ArgAddressMode::Stateless);
417414
ZEInfoBuilder::addPayloadArgumentByValue(
418415
m_kernelInfo.m_zePayloadArgs, Offset, Size, Index, SourceOffset, true);
416+
} else {
417+
ZEInfoBuilder::addPayloadArgumentByPointer(
418+
m_kernelInfo.m_zePayloadArgs, Offset, Size, Index, ZeAddrMode,
419+
PreDefinedAttrGetter::ArgAddrSpace::global, getZEArgAccessType(Access));
419420
}
420421

421422
if (AddrMode == ArgAddressMode::Stateful)

IGC/VectorCompiler/include/vc/GenXCodeGen/GenXOCLRuntimeInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ class GenXOCLRuntimeInfo : public ModulePass {
6666
enum class AddressModeType { None, Stateful, Bindless, Stateless };
6767

6868
private:
69+
unsigned ArgNo;
6970
unsigned Index;
7071
KindType Kind;
7172
AccessKindType AccessKind;
@@ -82,6 +83,7 @@ class GenXOCLRuntimeInfo : public ModulePass {
8283
KernelArgInfo() = default;
8384

8485
public:
86+
unsigned getArgNo() const { return ArgNo; }
8587
unsigned getIndex() const { return Index; }
8688
KindType getKind() const { return Kind; }
8789
AccessKindType getAccessKind() const { return AccessKind; }

IGC/VectorCompiler/lib/GenXCodeGen/GenXDetectPointerArg.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ bool GenXDetectPointerArg::handleKernel(Function &F) {
112112
NewDescs.resize(F.arg_size(), "");
113113

114114
for (auto *Arg : PointerArgs) {
115-
if (!Arg->getType()->isIntegerTy(64))
115+
if (!Arg->getType()->isIntegerTy(64) && !Arg->getType()->isPointerTy())
116116
continue;
117117

118118
auto ArgNo = Arg->getArgNo();
@@ -409,8 +409,8 @@ void GenXDetectPointerArg::analyzeValue(Value *V) {
409409
WorkList.push(Inst->getOperand(1));
410410
continue;
411411
}
412-
if (isa<GetElementPtrInst>(Inst)) {
413-
WorkList.push(Inst->getOperand(0));
412+
if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
413+
WorkList.push(GEP->getPointerOperand());
414414
continue;
415415
}
416416

IGC/VectorCompiler/lib/GenXCodeGen/GenXOCLRuntimeInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ KernelArgBuilder::translateArgument(const Argument &Arg) const {
297297
Info.BTI = KM.getBTI(ArgNo);
298298
// For implicit arguments that are byval argument linearization, index !=
299299
// ArgNo in the IR function.
300+
Info.ArgNo = ArgNo;
300301
Info.Index = KM.getArgIndex(ArgNo);
301302
// Linearization arguments have a non-zero offset in the original explicit
302303
// byval arg.
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; RUN: %opt_typed_ptrs %use_old_pass_manager% -GenXDetectPointerArg -march=genx64 -mcpu=XeHPC -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-TYPED-PTRS
10+
; RUN: %opt_opaque_ptrs %use_old_pass_manager% -GenXDetectPointerArg -march=genx64 -mcpu=XeHPC -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-OPAQUE-PTRS
11+
12+
target datalayout = "e-p:64:64-i64:64-n8:16:32:64"
13+
14+
%struct.state = type { i8, i8 addrspace(1) *, float }
15+
16+
@data = internal global <8 x i64> undef, align 64, !spirv.Decorations !0 #0
17+
18+
; Function Attrs: nounwind
19+
declare void @llvm.genx.vstore.v8i64.p0v8i64(<8 x i64>, <8 x i64>*) #1
20+
21+
; Function Attrs: nounwind
22+
define dllexport spir_kernel void @foo(%struct.state* byval(%struct.state) %_arg_, i32 addrspace(1)* %_arg_1, i64 %impl.arg.private.base, i8 %__arg_lin__arg_.0, i8 addrspace(1) *%__arg_lin__arg_.8, float %__arg_lin__arg_.16) #2 {
23+
entry:
24+
%0 = ptrtoint i8 addrspace(1)* %__arg_lin__arg_.8 to i64
25+
%1 = tail call <8 x i64> @llvm.vc.internal.lsc.load.ugm.v8i64.i1.v2i8.i64(i1 true, i8 3, i8 4, i8 5, <2 x i8> zeroinitializer, i64 0, i64 %0, i16 1, i32 0, <8 x i64> undef)
26+
tail call void @llvm.genx.vstore.v8i64.p0v8i64(<8 x i64> %1, <8 x i64>* nonnull @data)
27+
ret void
28+
}
29+
30+
declare <8 x i64> @llvm.vc.internal.lsc.load.ugm.v8i64.i1.v2i8.i64(i1, i8, i8, i8, <2 x i8>, i64, i64, i16, i32, <8 x i64>) #3
31+
32+
attributes #0 = { "VCByteOffset"="0" "VCGlobalVariable" "VCVolatile" "genx_byte_offset"="0" "genx_volatile" }
33+
attributes #1 = { nounwind "target-cpu"="XeHPC" }
34+
attributes #2 = { nounwind "CMGenxMain" "oclrt"="1" "target-cpu"="XeHPC" }
35+
attributes #3 = { "target-cpu"="XeHPC" }
36+
37+
!spirv.Source = !{!0}
38+
!opencl.spir.version = !{!1}
39+
!opencl.ocl.version = !{!2}
40+
!opencl.used.extensions = !{!3}
41+
!opencl.used.optional.core.features = !{!3}
42+
!spirv.Generator = !{!4}
43+
!genx.kernels = !{!5}
44+
!genx.kernel.internal = !{!10}
45+
46+
; CHECK: !genx.kernels = !{![[KERNEL:[0-9]+]]}
47+
; CHECK-TYPED-PTRS: ![[KERNEL]] = !{void (%struct.state*, i32 addrspace(1)*, i64, i8, i8 addrspace(1)*, float)* @foo, !"foo", !{{[0-9]+}}, i32 0, !{{[0-9]+}}, !{{[0-9]+}}, ![[NODE:[0-9]+]], i32 0}
48+
; CHECK-OPAQUE-PTRS: ![[KERNEL]] = !{ptr @foo, !"foo", !{{[0-9]+}}, i32 0, !{{[0-9]+}}, !{{[0-9]+}}, ![[NODE:[0-9]+]], i32 0}
49+
; CHECK: ![[NODE]] = !{!"svmptr_t", !"svmptr_t", !"", !"", !"svmptr_t", !""}
50+
51+
!0 = !{i32 0, i32 100000}
52+
!1 = !{i32 1, i32 2}
53+
!2 = !{i32 1, i32 0}
54+
!3 = !{}
55+
!4 = !{i16 6, i16 14}
56+
!5 = !{void (%struct.state*, i32 addrspace(1)*, i64, i8, i8 addrspace(1) *, float)* @foo, !"foo", !6, i32 0, !7, !8, !9, i32 0}
57+
!6 = !{i32 112, i32 0, i32 96, i32 104, i32 104, i32 104}
58+
!7 = !{i32 -1, i32 96, i32 64, i32 72, i32 80, i32 88}
59+
!8 = !{i32 0, i32 0}
60+
!9 = !{!"svmptr_t", !"svmptr_t"}
61+
!10 = !{void (%struct.state*, i32 addrspace(1)*, i64, i8, i8 addrspace(1) *, float)* @foo, !11, !12, !13, null}
62+
!11 = !{i32 0, i32 0, i32 0, i32 0, i32 8, i32 16}
63+
!12 = !{i32 0, i32 1, i32 2, i32 0, i32 0, i32 0}
64+
!13 = !{!14}
65+
!14 = !{i32 0, !15}
66+
!15 = !{!16, !17, !18}
67+
!16 = !{i32 3, i32 0}
68+
!17 = !{i32 4, i32 8}
69+
!18 = !{i32 5, i32 16}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: regkeys, pvc-supported
10+
; RUN: llvm-as %s -o %t.bc
11+
; RUN: ocloc -device pvc -llvm_input -options "-vc-codegen -ze-collect-cost-info -igc_opts 'ShaderDumpEnable=1, DumpToCustomDir=%t'" -output_no_suffix -file %t.bc
12+
; RUN: cat %t/*.zeinfo | FileCheck %s
13+
14+
target datalayout = "e-p:64:64-i64:64-n8:16:32:64"
15+
16+
%struct.state = type { i8, i8 addrspace(1) *, float }
17+
18+
@data = internal global <8 x i64> undef, align 64, !spirv.Decorations !0 #0
19+
20+
; Function Attrs: nounwind
21+
declare void @llvm.genx.vstore.v8i64.p0v8i64(<8 x i64>, <8 x i64>*)
22+
23+
; CHECK: payload_arguments:
24+
; CHECK: - arg_type: arg_bypointer
25+
; CHECK-NEXT: offset: 40
26+
; CHECK-NEXT: size: 8
27+
; CHECK-NEXT: arg_index: 1
28+
; CHECK-NEXT: addrmode: stateless
29+
; CHECK-NEXT: addrspace: global
30+
; CHECK-NEXT: access_type: readwrite
31+
; CHECK: - arg_type: arg_byvalue
32+
; CHECK-NEXT: offset: 16
33+
; CHECK-NEXT: size: 1
34+
; CHECK-NEXT: arg_index: 0
35+
; CHECK-NEXT: source_offset: 0
36+
; CHECK: - arg_type: arg_byvalue
37+
; CHECK-NEXT: offset: 24
38+
; CHECK-NEXT: size: 8
39+
; CHECK-NEXT: arg_index: 0
40+
; CHECK-NEXT: source_offset: 8
41+
; CHECK-NEXT: is_ptr: true
42+
; CHECK: - arg_type: arg_byvalue
43+
; CHECK-NEXT: offset: 32
44+
; CHECK-NEXT: size: 4
45+
; CHECK-NEXT: arg_index: 0
46+
; CHECK-NEXT: source_offset: 16
47+
48+
; Function Attrs: nounwind
49+
define dllexport spir_kernel void @foo(%struct.state* byval(%struct.state) "VCArgumentIOKind"="0" %_arg_, i32 addrspace(1)* "VCArgumentIOKind"="0" %_arg_1) #1 {
50+
entry:
51+
%0 = getelementptr inbounds %struct.state, %struct.state* %_arg_, i64 0, i32 1
52+
%1 = load i8 addrspace(1)*, i8 addrspace(1)** %0, align 8
53+
%2 = ptrtoint i8 addrspace(1)* %1 to i64
54+
%3 = tail call <8 x i64> @llvm.vc.internal.lsc.load.ugm.v8i64.i1.v2i8.i64(i1 true, i8 3, i8 4, i8 5, <2 x i8> zeroinitializer, i64 0, i64 %2, i16 1, i32 0, <8 x i64> undef)
55+
tail call void @llvm.genx.vstore.v8i64.p0v8i64(<8 x i64> %3, <8 x i64>* nonnull @data)
56+
ret void
57+
}
58+
59+
declare <8 x i64> @llvm.vc.internal.lsc.load.ugm.v8i64.i1.v2i8.i64(i1, i8, i8, i8, <2 x i8>, i64, i64, i16, i32, <8 x i64>) #3
60+
61+
attributes #0 = { "VCByteOffset"="0" "VCGlobalVariable" "VCVolatile" "genx_byte_offset"="0" "genx_volatile" }
62+
attributes #1 = { noinline nounwind "VCFunction" "VCNamedBarrierCount"="0" "VCSLMSize"="0" }
63+
64+
!spirv.Source = !{!0}
65+
!opencl.spir.version = !{!1}
66+
!opencl.ocl.version = !{!2}
67+
!opencl.used.extensions = !{!3}
68+
!opencl.used.optional.core.features = !{!3}
69+
!spirv.Generator = !{!4}
70+
71+
!0 = !{i32 0, i32 100000}
72+
!1 = !{i32 1, i32 2}
73+
!2 = !{i32 1, i32 0}
74+
!3 = !{}
75+
!4 = !{i16 6, i16 14}

0 commit comments

Comments
 (0)