Skip to content

Commit

Permalink
[NVPTX] Do not run the NVVMReflect pass as part of the normal pipeline
Browse files Browse the repository at this point in the history
Summary:
This pass lowers the `__nvvm_reflect` builtin in the IR. However, this
currently runs in the standard optimization pipeline, not just the
backend pipeline. This means that if the user creates LLVM-IR without an
architecture set, it will always delete the reflect code even if it is
intended to be used later.

Pushing this into the backend pipeline will ensure that this works as
intended, allowing users to conditionally include code depending on
which target architecture the user ended up using. This fixes a bug in
OpenMP and missing code in `libc`.
  • Loading branch information
jhuber6 committed Jan 6, 2025
1 parent f4bab06 commit f616b66
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 12 deletions.
1 change: 0 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,6 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerPipelineStartEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
FunctionPassManager FPM;
FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
// Note: NVVMIntrRangePass was causing numerical discrepancies at one
// point, if issues crop up, consider disabling.
FPM.addPass(NVVMIntrRangePass());
Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/Target/NVPTX/NVVMReflect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "NVPTX.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
Expand Down Expand Up @@ -219,7 +220,12 @@ bool NVVMReflect::runOnFunction(Function &F) {
return runNVVMReflect(F, SmVersion);
}

NVVMReflectPass::NVVMReflectPass() : NVVMReflectPass(0) {}
NVVMReflectPass::NVVMReflectPass() {
// Get the CPU string from the command line if not provided.
StringRef SM = codegen::getMCPU();
if (!SM.consume_front("sm_") || SM.consumeInteger(10, SmVersion))
SmVersion = 0;
}

PreservedAnalyses NVVMReflectPass::run(Function &F,
FunctionAnalysisManager &AM) {
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
; Libdevice in recent CUDA versions relies on __CUDA_ARCH reflecting GPU type.
; Verify that __nvvm_reflect() is replaced with an appropriate value.
;
; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_20 \
; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_20 \
; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_35 \
; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_35 \
; RUN: | FileCheck %s --check-prefixes=COMMON,SM35

@"$str" = private addrspace(1) constant [12 x i8] c"__CUDA_ARCH\00"
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; Verify that __nvvm_reflect_ocl() is replaced with an appropriate value
;
; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_20 \
; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_20 \
; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_35 \
; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_35 \
; RUN: | FileCheck %s --check-prefixes=COMMON,SM35

@"$str" = private addrspace(4) constant [12 x i8] c"__CUDA_ARCH\00"
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

; RUN: cat %s > %t.noftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2>' \
; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK

; RUN: cat %s > %t.ftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2>' \
; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK

@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
Expand Down Expand Up @@ -43,7 +43,7 @@ exit:

declare i32 @llvm.nvvm.reflect(ptr)

; CHECK-LABEL: define noundef i32 @intrinsic
; CHECK-LABEL: define i32 @intrinsic
define i32 @intrinsic() {
; CHECK-NOT: call i32 @llvm.nvvm.reflect
; USE_FTZ_0: ret i32 0
Expand Down
7 changes: 4 additions & 3 deletions llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

; RUN: cat %s > %t.noftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2>' \
; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK

; RUN: cat %s > %t.ftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2>' \
; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK

@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
Expand Down Expand Up @@ -43,7 +43,8 @@ exit:

declare i32 @llvm.nvvm.reflect(ptr)

; CHECK-LABEL: define noundef i32 @intrinsic
; CHECK-LABEL: define i32 @intrinsic

define i32 @intrinsic() {
; CHECK-NOT: call i32 @llvm.nvvm.reflect
; USE_FTZ_0: ret i32 0
Expand Down

0 comments on commit f616b66

Please sign in to comment.