Skip to content

Commit e4cb0cc

Browse files
vmustyafda0
authored andcommitted
Fix pointer kernel argument detection in VC
The pointer kernel argument detection logic should properly handle integer arithmetic operations to avoid false positives. (cherry picked from commit 67a6ce3)
1 parent c1c3a5d commit e4cb0cc

File tree

3 files changed

+116
-1
lines changed

3 files changed

+116
-1
lines changed

IGC/AdaptorOCL/ocl_igc_shared/indirect_access_detection/version.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ SPDX-License-Identifier: MIT
1313

1414
#pragma once
1515

16-
const uint32_t INDIRECT_ACCESS_DETECTION_VERSION = 6;
16+
const uint32_t INDIRECT_ACCESS_DETECTION_VERSION = 7;

IGC/VectorCompiler/lib/GenXCodeGen/GenXDetectPointerArg.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,25 @@ void GenXDetectPointerArg::analyzeValue(Value *V) {
414414
continue;
415415
}
416416

417+
// Skip function calls and intrinsics.
418+
if (isa<CallInst>(Inst))
419+
continue;
420+
421+
switch (Inst->getOpcode()) {
422+
default:
423+
break;
424+
case Instruction::Mul:
425+
case Instruction::UDiv:
426+
case Instruction::SDiv:
427+
case Instruction::URem:
428+
case Instruction::SRem:
429+
case Instruction::Shl:
430+
case Instruction::LShr:
431+
case Instruction::AShr:
432+
// Mul-like and div-like instructions cannot produce a pointer.
433+
continue;
434+
}
435+
417436
for (auto &Op : Inst->operands())
418437
WorkList.push(Op.get());
419438
}
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
;
9+
; RUN: %opt_typed_ptrs %use_old_pass_manager% -GenXDetectPointerArg -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=XeHPC -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-TYPED-PTRS
10+
; RUN: %opt_opaque_ptrs %use_old_pass_manager% -GenXDetectPointerArg -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=XeHPC -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-OPAQUE-PTRS
11+
12+
@data = internal global <8 x i64> undef, align 64, !spirv.Decorations !0 #0
13+
14+
declare void @llvm.genx.vstore.v8i64.p0v8i64(<8 x i64>, <8 x i64>*) #1
15+
16+
declare i64 @llvm.genx.absi.i64(i64)
17+
18+
define dllexport spir_kernel void @kernel(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5, i64 %arg6, i64 %arg7, i64 %arg8, i64 %arg9, i64 %impl.arg.private.base) local_unnamed_addr #2 {
19+
%mul = mul i64 %arg1, 8
20+
%x.0 = add i64 %mul, %arg0
21+
22+
%udiv = udiv i64 %arg2, 8
23+
%x.1 = add i64 %udiv, %x.0
24+
25+
%sdiv = sdiv i64 %arg3, 8
26+
%x.2 = add i64 %sdiv, %x.1
27+
28+
%urem = urem i64 %arg4, 8
29+
%x.3 = add i64 %urem, %x.2
30+
31+
%srem = srem i64 %arg5, 8
32+
%x.4 = add i64 %srem, %x.3
33+
34+
%shl = shl i64 %arg6, 3
35+
%x.5 = add i64 %shl, %x.4
36+
37+
%lshr = lshr i64 %arg7, 3
38+
%x.6 = add i64 %lshr, %x.5
39+
40+
%ashr = ashr i64 %arg8, 3
41+
%x.7 = add i64 %ashr, %x.6
42+
43+
%abs = call i64 @llvm.genx.absi.i64(i64 %arg9)
44+
%x.8 = add i64 %abs, %x.7
45+
46+
%load = tail call <8 x i64> @llvm.vc.internal.lsc.load.ugm.v8i64.i1.v2i8.i64(i1 true, i8 3, i8 4, i8 5, <2 x i8> zeroinitializer, i64 0, i64 %x.8, i16 1, i32 0, <8 x i64> undef)
47+
tail call void @llvm.genx.vstore.v8i64.p0v8i64(<8 x i64> %load, <8 x i64>* nonnull @data)
48+
ret void
49+
}
50+
51+
declare <8 x i64> @llvm.vc.internal.lsc.load.ugm.v8i64.i1.v2i8.i64(i1, i8, i8, i8, <2 x i8>, i64, i64, i16, i32, <8 x i64>) #3
52+
53+
attributes #0 = { "VCByteOffset"="0" "VCGlobalVariable" "VCVolatile" "genx_byte_offset"="0" "genx_volatile" }
54+
attributes #1 = { nounwind "target-cpu"="XeHPC" }
55+
attributes #2 = { noinline nounwind "CMGenxMain" "VC.Stack.Amount"="0" "oclrt"="1" "target-cpu"="XeHPC" }
56+
attributes #3 = { nofree nounwind readonly "target-cpu"="XeHPC" }
57+
58+
!spirv.MemoryModel = !{!5}
59+
!opencl.enable.FP_CONTRACT = !{}
60+
!spirv.Source = !{!6}
61+
!opencl.spir.version = !{!7, !8, !8, !8, !8, !8, !8, !8, !8, !8, !8, !8, !8, !8, !8, !8, !8, !8}
62+
!opencl.ocl.version = !{!6, !8, !8, !8, !8, !8, !8, !8, !8, !8, !8, !8, !8, !8, !8, !8, !8, !8}
63+
!opencl.used.extensions = !{!9}
64+
!opencl.used.optional.core.features = !{!9}
65+
!spirv.Generator = !{!10}
66+
!genx.kernels = !{!11}
67+
!genx.kernel.internal = !{!16}
68+
!llvm.ident = !{!19, !19, !19, !19, !19, !19, !19, !19, !19, !19, !19, !19, !19, !19, !19, !19, !19}
69+
!llvm.module.flags = !{!20}
70+
71+
; CHECK: !genx.kernels = !{![[KERNEL:[0-9]+]]}
72+
; CHECK-TYPED-PTRS: ![[KERNEL]] = !{void (i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64)* @kernel, !"kernel", !{{[0-9]+}}, i32 0, !{{[0-9]+}}, !{{[0-9]+}}, ![[NODE:[0-9]+]], i32 0}
73+
; CHECK-OPAQUE-PTRS: ![[KERNEL]] = !{ptr @kernel, !"kernel", !{{[0-9]+}}, i32 0, !{{[0-9]+}}, !{{[0-9]+}}, ![[NODE:[0-9]+]], i32 0}
74+
; CHECK: ![[NODE]] = !{!"svmptr_t", !"", !"", !"", !"", !"", !"", !"", !"", !"", !""}
75+
76+
!0 = !{!1, !2, !3, !4}
77+
!1 = !{i32 21}
78+
!2 = !{i32 44, i32 64}
79+
!3 = !{i32 5624}
80+
!4 = !{i32 5628, i32 0}
81+
!5 = !{i32 2, i32 2}
82+
!6 = !{i32 0, i32 0}
83+
!7 = !{i32 1, i32 2}
84+
!8 = !{i32 2, i32 0}
85+
!9 = !{}
86+
!10 = !{i16 6, i16 14}
87+
!11 = !{void (i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64)* @kernel, !"kernel", !12, i32 0, !13, !14, !15, i32 0}
88+
!12 = !{i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 96}
89+
!13 = !{i32 136, i32 144, i32 152, i32 160, i32 168, i32 176, i32 184, i32 192, i32 200, i32 208, i32 128}
90+
!14 = !{i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0}
91+
!15 = !{!"", !"", !"", !"", !"", !"", !"", !"", !"", !""}
92+
!16 = !{void (i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64)* @kernel, !14, !17, !9, !18}
93+
!17 = !{i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10}
94+
!18 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 255}
95+
!19 = !{!"clang version 14.0.5"}
96+
!20 = !{i32 1, !"wchar_size", i32 4}

0 commit comments

Comments
 (0)