Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NVPTX] Do not run the NVVMReflect pass as part of the normal pipeline #121834

Merged
merged 3 commits into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ void NVPTXSubtarget::anchor() {}
NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
// Provide the default CPU if we don't have one.
TargetName = std::string(CPU.empty() ? "sm_30" : CPU);
TargetName = std::string(CPU);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It could use a comment on why we may want to keep CPU empty in some cases.


ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS);
ParseSubtargetFeatures(CPU.empty() ? "sm_30" : CPU,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CPU.empty() ? "sm_30" : CPU -> getTargetName()

/*TuneCPU=*/CPU.empty() ? "sm_30" : CPU, FS);

// Re-map SM version numbers, SmVersion carries the regular SMs which do
// have relative order, while FullSmVersion allows distinguishing sm_90 from
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,10 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
// - 0 represents base GPU model,
// - non-zero value identifies particular architecture-accelerated variant.
bool hasAAFeatures() const { return getFullSmVersion() % 10; }
std::string getTargetName() const { return TargetName; }
std::string getTargetName() const {
return TargetName.empty() ? "sm_30" : TargetName;
}
bool hasTargetName() const { return !TargetName.empty(); }

// Get maximum value of required alignments among the supported data types.
// From the PTX ISA doc, section 8.2.3:
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,10 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerPipelineStartEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
FunctionPassManager FPM;
FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
jhuber6 marked this conversation as resolved.
Show resolved Hide resolved
// We do not want to fold out calls to nvvm.reflect early if the user
// has not provided a target architecture just yet.
if (Subtarget.hasTargetName())
FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
// Note: NVVMIntrRangePass was causing numerical discrepancies at one
// point, if issues crop up, consider disabling.
FPM.addPass(NVVMIntrRangePass());
Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/Target/NVPTX/NVVMReflect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "NVPTX.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
Expand Down Expand Up @@ -219,7 +220,12 @@ bool NVVMReflect::runOnFunction(Function &F) {
return runNVVMReflect(F, SmVersion);
}

NVVMReflectPass::NVVMReflectPass() : NVVMReflectPass(0) {}
NVVMReflectPass::NVVMReflectPass() {
// Get the CPU string from the command line if not provided.
StringRef SM = codegen::getMCPU();
if (!SM.consume_front("sm_") || SM.consumeInteger(10, SmVersion))
SmVersion = 0;
}

PreservedAnalyses NVVMReflectPass::run(Function &F,
FunctionAnalysisManager &AM) {
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
; Libdevice in recent CUDA versions relies on __CUDA_ARCH reflecting GPU type.
; Verify that __nvvm_reflect() is replaced with an appropriate value.
;
; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_20 \
; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_20 \
; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_35 \
; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_35 \
; RUN: | FileCheck %s --check-prefixes=COMMON,SM35

@"$str" = private addrspace(1) constant [12 x i8] c"__CUDA_ARCH\00"
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; Verify that __nvvm_reflect_ocl() is replaced with an appropriate value
;
; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_20 \
; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_20 \
; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_35 \
; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_35 \
; RUN: | FileCheck %s --check-prefixes=COMMON,SM35

@"$str" = private addrspace(4) constant [12 x i8] c"__CUDA_ARCH\00"
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

; RUN: cat %s > %t.noftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2>' \
; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK

; RUN: cat %s > %t.ftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2>' \
; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK

@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
Expand Down Expand Up @@ -43,7 +43,7 @@ exit:

declare i32 @llvm.nvvm.reflect(ptr)

; CHECK-LABEL: define noundef i32 @intrinsic
; CHECK-LABEL: define i32 @intrinsic
define i32 @intrinsic() {
; CHECK-NOT: call i32 @llvm.nvvm.reflect
; USE_FTZ_0: ret i32 0
Expand Down
7 changes: 4 additions & 3 deletions llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

; RUN: cat %s > %t.noftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2>' \
; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK

; RUN: cat %s > %t.ftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2>' \
; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK

@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
Expand Down Expand Up @@ -43,7 +43,8 @@ exit:

declare i32 @llvm.nvvm.reflect(ptr)

; CHECK-LABEL: define noundef i32 @intrinsic
; CHECK-LABEL: define i32 @intrinsic

define i32 @intrinsic() {
; CHECK-NOT: call i32 @llvm.nvvm.reflect
; USE_FTZ_0: ret i32 0
Expand Down
Loading