Skip to content

Commit

Permalink
[CSI] Fix instrumentation around sync and sync.unwind instructions. F…
Browse files Browse the repository at this point in the history
…ix promotion of calls to invokes when potentially-throwing calls are inside tasks with unwind destinations.
  • Loading branch information
neboat committed Feb 3, 2025
1 parent e1dfee5 commit d752b94
Show file tree
Hide file tree
Showing 9 changed files with 995 additions and 49 deletions.
37 changes: 37 additions & 0 deletions clang/test/Cilk/cilk-mixed-unwind-codegen.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Check that Clang may generate functions calls that can throw with or without
// a landingpad in the same Cilk scope.
//
// RUN: %clang_cc1 -fopencilk -fcxx-exceptions -fexceptions -ftapir=none -triple x86_64-unknown-linux-gnu -std=c++11 -emit-llvm %s -o - | FileCheck %s
// expected-no-diagnostics

int bar(int n);
void foo(int n) {
cilk_for (int i = 0; i < n; ++i) {
int w = bar(i);
throw bar(w);
}
}

// CHECK-LABEL: define {{.*}}void @_Z3fooi(i32 {{.*}}%n)

// Check for detach with an unwind destination
// CHECK: detach within %[[SYNCREG:.+]], label %[[PFOR_BODY_ENTRY:.+]], label %[[PFOR_INC:.+]] unwind label %[[DETACH_LPAD:.+]]

// CHECK: [[PFOR_BODY_ENTRY]]:

// Check for call to function bar that might throw.
// CHECK: call {{.*}}i32 @_Z3bari(i32

// Check for invoke of function bar
// CHECK: invoke noundef i32 @_Z3bari(i32
// CHECK-NEXT: to label %[[INVOKE_CONT:.+]] unwind label %[[TASK_LPAD:.+]]

// CHECK: [[INVOKE_CONT]]:
// CHECK: call void @__cxa_throw(ptr
// CHECK-NEXT: unreachable

// CHECK: [[TASK_LPAD]]:
// CHECK-NEXT: landingpad
// CHECK-NEXT: cleanup
// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], { ptr, i32 } %{{.*}})
// CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[DETACH_LPAD]]
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
Expand Down Expand Up @@ -1138,10 +1140,19 @@ bool CSIImpl::instrumentMemIntrinsic(Instruction *I) {
}

void CSIImpl::instrumentBasicBlock(BasicBlock &BB, const TaskInfo &TI) {
IRBuilder<> IRB(&*BB.getFirstInsertionPt());
Instruction *InsertPt = &*BB.getFirstInsertionPt();
bool IsEntry = isEntryBlock(BB, TI);
if (IsEntry)
IRB.SetInsertPoint(getEntryBBInsertPt(BB));
InsertPt = getEntryBBInsertPt(BB);
// Skip any sync.unwind intrinsics, which need to remain paired with
// corresponding syncs.
if (isSyncUnwind(InsertPt))
InsertPt = InsertPt->getNextNode();
// Skip any taskframe.end intrinsics, to keep the basic-block instrumentation
// in the same basic block.
if (isTapirIntrinsic(Intrinsic::taskframe_end, InsertPt))
InsertPt = InsertPt->getNextNode();
IRBuilder<> IRB(InsertPt);
uint64_t LocalId = BasicBlockFED.add(BB);
uint64_t BBSizeId = BBSize.add(BB, GetTTI ?
&(*GetTTI)(*BB.getParent()) : nullptr);
Expand Down Expand Up @@ -1235,8 +1246,24 @@ void CSIImpl::instrumentLoop(Loop &L, TaskInfo &TI, ScalarEvolution *SE) {
insertHookCall(&*IRB.GetInsertPoint(), CsiLoopBodyEntry, {LoopCsiId,
LoopPropVal});

SmallPtrSet<BasicBlock *, 4> ExitingBlocksVisited;
// Insert hooks at the ends of the exiting blocks.
for (BasicBlock *BB : ExitingBlocks) {
while (!ExitingBlocks.empty()) {
BasicBlock *BB = ExitingBlocks.pop_back_val();
if (!ExitingBlocksVisited.insert(BB).second)
continue;
if (isSyncUnwind(BB->getTerminator())) {
// Insert the loopbody_exit hook before the sync instruction, rather than
// the sync.unwind.
// TODO: I don't think there's anything preventing a sync.unwind from
// having multiple sync-instruction predecessors, so all such predecessors
// need to be addressed. This logic should become simpler if sync itself
// is modified to have an unwind destination.
for (BasicBlock *Pred : predecessors(BB))
ExitingBlocks.push_back(Pred);
continue;
}

// Record properties of this loop exit
CsiLoopExitProperty LoopExitProp;
LoopExitProp.setIsLatch(L.isLoopLatch(BB));
Expand Down Expand Up @@ -1806,13 +1833,16 @@ CallInst *CSIImpl::insertHookCallInSuccessorBB(BasicBlock *Succ, BasicBlock *BB,
ArrayRef<Value *> HookArgs,
ArrayRef<Value *> DefaultArgs) {
assert(HookFunction && "No hook function given.");
Instruction *InsertPt = &*Succ->getFirstInsertionPt();
if (isSyncUnwind(InsertPt))
InsertPt = InsertPt->getNextNode();

// If this successor block has a unique predecessor, just insert the hook call
// as normal.
if (Succ->getUniquePredecessor()) {
assert(Succ->getUniquePredecessor() == BB &&
"BB is not unique predecessor of successor block");
return insertHookCall(&*Succ->getFirstInsertionPt(), HookFunction,
HookArgs);
return insertHookCall(InsertPt, HookFunction, HookArgs);
}

if (updateArgPHIs(Succ, BB, HookFunction, HookArgs, DefaultArgs))
Expand All @@ -1823,7 +1853,7 @@ CallInst *CSIImpl::insertHookCallInSuccessorBB(BasicBlock *Succ, BasicBlock *BB,
for (PHINode *ArgPHI : ArgPHIs[Key])
SuccessorHookArgs.push_back(ArgPHI);

IRBuilder<> IRB(&*Succ->getFirstInsertionPt());
IRBuilder<> IRB(InsertPt);
// Insert the hook call, using the PHI as the CSI ID.
CallInst *Call = IRB.CreateCall(HookFunction, SuccessorHookArgs);
setInstrumentationDebugLoc(*Succ, (Instruction *)Call);
Expand Down Expand Up @@ -2747,6 +2777,11 @@ void CSIImpl::instrumentFunction(Function &F) {
for (BasicBlock *BB : BasicBlocks)
instrumentBasicBlock(*BB, TI);

if (Options.InstrumentLoops)
// Recursively instrument all loops
for (Loop *L : LI)
instrumentLoop(*L, TI, SE);

// Instrument Tapir constructs.
if (Options.InstrumentTapir) {
if (Config->DoesFunctionRequireInstrumentationForPoint(
Expand All @@ -2768,11 +2803,6 @@ void CSIImpl::instrumentFunction(Function &F) {
for (Instruction *I : Allocas)
instrumentAlloca(I, TI);

if (Options.InstrumentLoops)
// Recursively instrument all loops
for (Loop *L : LI)
instrumentLoop(*L, TI, SE);

// Do this work in a separate loop after copying the iterators so that we
// aren't modifying the list as we're iterating.
if (Options.InstrumentMemoryAccesses)
Expand Down
59 changes: 24 additions & 35 deletions llvm/lib/Transforms/Utils/TapirUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Utils/TapirUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LoopInfo.h"
Expand Down Expand Up @@ -85,16 +86,13 @@ bool llvm::isSkippableTapirIntrinsic(const Instruction *I) {
/// Returns true if the given basic block \p B is a placeholder successor of a
/// taskframe.resume or detached.rethrow.
bool llvm::isTapirPlaceholderSuccessor(const BasicBlock *B) {
for (const BasicBlock *Pred : predecessors(B)) {
return llvm::any_of(predecessors(B), [&](const BasicBlock *Pred) {
if (!isDetachedRethrow(Pred->getTerminator()) &&
!isTaskFrameResume(Pred->getTerminator()))
return false;

const InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator());
if (B != II->getNormalDest())
return false;
}
return true;
return B == II->getNormalDest();
});
}

/// Returns a taskframe.resume that uses the given taskframe, or nullptr if no
Expand Down Expand Up @@ -2166,37 +2164,28 @@ static void promoteCallsInTasksHelper(
// spawned task recursively.
if (DetachInst *DI = dyn_cast<DetachInst>(BB->getTerminator())) {
Processed.insert(BB);
if (!DI->hasUnwindDest()) {
// Create an unwind edge for the subtask, which is terminated with a
// detached-rethrow.
BasicBlock *SubTaskUnwindEdge = CreateSubTaskUnwindEdge(
Intrinsic::detached_rethrow, DI->getSyncRegion(), UnwindEdge,
Unreachable, DI);
// Recursively check all blocks in the detached task.
promoteCallsInTasksHelper(DI->getDetached(), SubTaskUnwindEdge,
Unreachable, CurrentTaskFrame, &Worklist,
Processed, IgnoreFunctionCheck);
// If the new unwind edge is not used, remove it.
if (pred_empty(SubTaskUnwindEdge))
SubTaskUnwindEdge->eraseFromParent();
else
DetachesToReplace.push_back(DI);

} else {
// Because this detach has an unwind destination, any calls in the
// spawned task that may throw should already be invokes. Hence there
// is no need to promote calls in this task.
if (IgnoreFunctionCheck) {
// This recursive call should only apply IgnoreFunctionCheck to callsites.
promoteCallsInTasksHelper(DI->getDetached(), DI->getUnwindDest(),
Unreachable, CurrentTaskFrame, &Worklist,
Processed, IgnoreFunctionCheck);
}
// Create an unwind edge for the subtask, which is terminated with a
// detached-rethrow.
BasicBlock *SubTaskUnwindEdge = CreateSubTaskUnwindEdge(
Intrinsic::detached_rethrow, DI->getSyncRegion(),
DI->hasUnwindDest() ? DI->getUnwindDest() : UnwindEdge, Unreachable,
DI);
// Recursively check all blocks in the detached task.
promoteCallsInTasksHelper(DI->getDetached(), SubTaskUnwindEdge,
Unreachable, CurrentTaskFrame, &Worklist,
Processed, IgnoreFunctionCheck);

// If the new unwind edge is not used, remove it.
if (pred_empty(SubTaskUnwindEdge))
SubTaskUnwindEdge->eraseFromParent();
else if (!DI->hasUnwindDest())
DetachesToReplace.push_back(DI);

if (DI->hasUnwindDest() && Visited.insert(DI->getUnwindDest()).second)
// If the detach-unwind isn't dead, add it to the worklist.
Worklist.push_back(DI->getUnwindDest());

if (Visited.insert(DI->getUnwindDest()).second)
// If the detach-unwind isn't dead, add it to the worklist.
Worklist.push_back(DI->getUnwindDest());
}
// Add the continuation to the worklist.
if (isTaskFrameResume(UnwindEdge->getTerminator()) &&
(CurrentTaskFrame == getTaskFrameUsed(DI->getDetached()))) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
; Check that CSI does not insert instrumentation between a sync and its corresponding sync.unwind.
;
; RUN: opt < %s -passes="csi-setup,csi" -csi-instrument-basic-blocks=false -S | FileCheck %s --check-prefixes=CHECK
; RUN: opt < %s -passes="csi-setup,csi" -S | FileCheck %s --check-prefixes=CHECK,CHECK-BB
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: nounwind willreturn memory(argmem: readwrite)
declare token @llvm.syncregion.start() #0

; Function Attrs: nounwind willreturn memory(argmem: readwrite)
declare token @llvm.taskframe.create() #0

; Function Attrs: willreturn memory(argmem: readwrite)
declare void @llvm.sync.unwind(token) #1

; Function Attrs: nounwind willreturn memory(argmem: readwrite)
declare void @llvm.taskframe.end(token) #0

define fastcc void @_Z28prove_sumcheck_cubic_batchedR16ProverTranscriptRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE14GoldilockField8FixedVecIP9DensePolyESD_RSB_SD_SD_SD_St4spanIKS9_Lm18446744073709551615EE.outline_pfor.cond522.ls2() personality ptr null {
pfor.cond522.preheader.ls2:
%syncreg529.ls2 = tail call token @llvm.syncregion.start()
br label %pfor.body.entry525.tf.tf.tf.tf.tf.tf.tf.tf.ls2

pfor.body.entry525.tf.tf.tf.tf.tf.tf.tf.tf.ls2: ; preds = %sync.continue578.ls2, %pfor.cond522.preheader.ls2
%0 = tail call token @llvm.taskframe.create()
detach within %syncreg529.ls2, label %det.achd554.ls2, label %det.cont569.ls2

det.cont569.ls2: ; preds = %det.achd554.ls2, %pfor.body.entry525.tf.tf.tf.tf.tf.tf.tf.tf.ls2
sync within %syncreg529.ls2, label %sync.continue578.ls2

sync.continue578.ls2: ; preds = %det.cont569.ls2
tail call void @llvm.sync.unwind(token %syncreg529.ls2) #2
tail call void @llvm.taskframe.end(token %0)
br i1 false, label %pfor.cond.cleanup599.ls2.tfend, label %pfor.body.entry525.tf.tf.tf.tf.tf.tf.tf.tf.ls2

det.achd554.ls2: ; preds = %pfor.body.entry525.tf.tf.tf.tf.tf.tf.tf.tf.ls2
reattach within %syncreg529.ls2, label %det.cont569.ls2

pfor.cond.cleanup599.ls2.tfend: ; preds = %sync.continue578.ls2
ret void
}

; CHECK: define {{.*}}void @_Z28prove_sumcheck_cubic_batchedR16ProverTranscriptRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE14GoldilockField8FixedVecIP9DensePolyESD_RSB_SD_SD_SD_St4spanIKS9_Lm18446744073709551615EE.outline_pfor.cond522.ls2()

; CHECK: %syncreg529.ls2 = {{.*}}call token @llvm.syncregion.start()
; CHECK: %[[TF:.+]] = {{.*}}call token @llvm.taskframe.create()

; CHECK: sync within %syncreg529.ls2, label %[[SYNC_CONT:.+]]

; CHECK: [[SYNC_CONT]]:
; CHECK-NOT: call void @__csi_
; CHECK-NEXT: void @llvm.sync.unwind(token %syncreg529.ls2
; CHECK: call void @__csi_after_sync(
; CHECK-BB-NOT: @__csi_bb_
; CHECK: call void @llvm.taskframe.end(token %[[TF]])

; CHECK-BB: call void @__csi_bb_entry(

; CHECK: call void @__csi_loopbody_exit(

; CHECK: reattach within %syncreg529.ls2

; uselistorder directives
uselistorder ptr null, { 1, 2, 0 }

attributes #0 = { nounwind willreturn memory(argmem: readwrite) }
attributes #1 = { willreturn memory(argmem: readwrite) }
attributes #2 = { nounwind }
Loading

0 comments on commit d752b94

Please sign in to comment.