Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Utility thread and further threading support #279

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
1 change: 1 addition & 0 deletions llvm/include/llvm/Cheerp/NameGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class NameGenerator
ATOMICXCHG,
ATOMICCMPXCHG,
THREADINGOBJECT,
BLOBNAME,
MEMORY,
HEAP8,
HEAP16,
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Cheerp/PassRegistry.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "llvm/Cheerp/CallConstructors.h"
#include "llvm/Cheerp/CommandLine.h"
#include "llvm/Cheerp/CheerpLowerAtomic.h"
#include "llvm/Cheerp/ThreadLocalLowering.h"

namespace cheerp {

Expand Down
40 changes: 40 additions & 0 deletions llvm/include/llvm/Cheerp/ThreadLocalLowering.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
//===-- ThreadLocalLowering.h - Cheerp helper -------------------------===//
//
// Cheerp: The C++ compiler for the Web
//
// This file is distributed under the Apache License v2.0 with LLVM Exceptions.
// See LICENSE.TXT for details.
//
// Copyright 2024-2025 Leaning Technologies
//
//===----------------------------------------------------------------------===//

#ifndef CHEERP_THREAD_LOCAL_LOWERING_H
#define CHEERP_THREAD_LOCAL_LOWERING_H

#include "llvm/IR/PassManager.h"
#include "llvm/Cheerp/GlobalDepsAnalyzer.h"

namespace cheerp{

using namespace llvm;

class ThreadLocalLoweringInnerPass: public PassInfoMixin<ThreadLocalLoweringInnerPass> {
GlobalDepsAnalyzer& GDA;
public:
ThreadLocalLoweringInnerPass(GlobalDepsAnalyzer& GDA) : GDA(GDA)
{
}
PreservedAnalyses run(Function& F, FunctionAnalysisManager& FAM);
static bool isRequired() { return true;}
};

class ThreadLocalLoweringPass: public PassInfoMixin<ThreadLocalLoweringPass> {
public:
PreservedAnalyses run(Module& M, ModuleAnalysisManager& MAM);
static bool isRequired() { return true;}
};

}

#endif
3 changes: 3 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsCheerp.td
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ def int_cheerp_get_thread_pointer : Intrinsic<[llvm_i32_ty]>;
def int_cheerp_set_thread_pointer : Intrinsic<[],
[llvm_i32_ty]>;

def int_cheerp_get_threadlocal_offset : Intrinsic<[llvm_i32_ty],
[llvm_anyptr_ty]>;

def int_cheerp_atomic_wait : Intrinsic<[llvm_i32_ty],
[llvm_ptr_ty, llvm_i32_ty, llvm_i64_ty]>,
ClangBuiltin<"__builtin_cheerp_atomic_wait">;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CheerpUtils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ add_llvm_component_library(LLVMCheerpUtils
FinalizeMemoryInfo.cpp
CheerpLowerAtomic.cpp
JsExport.cpp
ThreadLocalLowering.cpp
)

add_dependencies(LLVMCheerpUtils intrinsics_gen)
30 changes: 24 additions & 6 deletions llvm/lib/CheerpUtils/CallConstructors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,17 @@ namespace cheerp
PreservedAnalyses CallConstructorsPass::run(llvm::Module &M, llvm::ModuleAnalysisManager &MPA)
{
FunctionType* Ty = FunctionType::get(Type::getVoidTy(M.getContext()), false);
Function* Ctors = cast<Function>(M.getOrInsertFunction("_start", Ty).getCallee());
if (!Ctors->empty())
Function* StartFunction = cast<Function>(M.getOrInsertFunction("_start", Ty).getCallee());
if (!StartFunction->empty())
return PreservedAnalyses::all();

BasicBlock* Entry = BasicBlock::Create(M.getContext(),"entry", Ctors);
IRBuilder<> Builder(Entry);
BasicBlock* StartEntry = BasicBlock::Create(M.getContext(),"entry", StartFunction);
IRBuilder<> Builder(StartEntry);

Function* StartPreThread = cast<Function>(M.getOrInsertFunction("_startPreThread", Ty).getCallee());
BasicBlock* StartPreThreadEntry = BasicBlock::Create(M.getContext(),"entry", StartPreThread);
if (!LowerAtomics)
Builder.SetInsertPoint(StartPreThreadEntry);

if (LinearOutput == LinearOutputTy::Wasm)
{
Expand All @@ -56,10 +61,24 @@ PreservedAnalyses CallConstructorsPass::run(llvm::Module &M, llvm::ModuleAnalysi
{
Builder.CreateCall(Ty, cast<Function>(C->getAggregateElement(1)->stripPointerCastsSafe()));
}

if (!LowerAtomics)
{
// If -pthread is passed, add a call to spawnUtility to setup the utility thread.
Function* spawnUtility = cast<Function>(M.getOrInsertFunction("spawnUtility", Ty).getCallee());
Builder.CreateCall(Ty, spawnUtility);
Builder.CreateRetVoid();
Builder.SetInsertPoint(StartEntry);
}

Function* Main = getMainFunction(M);
bool Wasi = Triple(M.getTargetTriple()).getOS() == Triple::WASI;
if (Wasi || (Main && Main->getSection() == "asmjs"))
Ctors->setSection("asmjs");
{
StartFunction->setSection("asmjs");
if (!LowerAtomics)
StartPreThread->setSection("asmjs");
}
if (Main)
{
Value* ExitCode = nullptr;
Expand Down Expand Up @@ -124,7 +143,6 @@ PreservedAnalyses CallConstructorsPass::run(llvm::Module &M, llvm::ModuleAnalysi
Builder.CreateCall(Exit->getFunctionType(), Exit, ExitCode);
}
}

Builder.CreateRetVoid();

PreservedAnalyses PA = PreservedAnalyses::none();
Expand Down
24 changes: 22 additions & 2 deletions llvm/lib/CheerpUtils/GlobalDepsAnalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,7 @@ bool GlobalDepsAnalyzer::runOnModule( llvm::Module & module )
extendLifetime(startFunc);
if (startFunc->getSection() == "asmjs")
asmJSExportedFunctions.insert(startFunc);
else if (LinearOutput == LinearOutputTy::Wasm)
else if (LinearOutput == LinearOutputTy::Wasm && LowerAtomics)
{
// Because memory_init is empty, it will not be automatically tagged as asmjs
// when the _start function is not, but it should. So we do it manually.
Expand All @@ -714,7 +714,27 @@ bool GlobalDepsAnalyzer::runOnModule( llvm::Module & module )
llvm::errs() << "warning: _start function point not found\n";
}
entryPoint = startFunc;


// If -pthread is linked in, keep the _startPreThread function alive.
if (!LowerAtomics)
{
llvm::Function* startPreThread = module.getFunction("_startPreThread");
if (startPreThread)
{
extendLifetime(startPreThread);
if (startPreThread->getSection() == "asmjs")
asmJSExportedFunctions.insert(startPreThread);
else if (LinearOutput == LinearOutputTy::Wasm)
{
llvm::Function* initFunc = module.getFunction("__memory_init");
assert(initFunc);
asmJSExportedFunctions.insert(initFunc);
}
}
else
llvm::errs() << "warning: _startPtrThread function point not found, and -pthread is linked\n";
}

processEnqueuedFunctions();

// Flush out all functions
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/CheerpUtils/LinearMemoryHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,6 @@ void LinearMemoryHelper::addGlobals()

// Compute the global variable addresses.
// Also, for thread locals, calculate offsets to the image start, and the total size of the image.
threadLocalImageSize = 0;
threadLocalStart = 0;
globalsStart = 0;
for (const auto G: asmjsGlobals) {
Expand All @@ -460,10 +459,12 @@ void LinearMemoryHelper::addGlobals()
asmjsThreadLocals.push_back(G);
if (threadLocalStart == 0)
threadLocalStart = heapStart;
threadLocalImageSize += size;
}
heapStart += size;
}
heapStart = (heapStart + 7) & ~7;
threadLocalImageSize = heapStart - threadLocalStart;
// Align the thread local storage to 8 bytes.
}

void LinearMemoryHelper::generateGlobalizedGlobalsUsage()
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CheerpUtils/PointerAnalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,7 @@ PointerKindWrapper& PointerUsageVisitor::visitValue(PointerKindWrapper& ret, con
case Intrinsic::invariant_start:
return CacheAndReturn(visitValue(ret, intrinsic->getArgOperand(1), /*first*/ false));
case Intrinsic::stacksave:
case Intrinsic::threadlocal_address:
return CacheAndReturn(ret = PointerKindWrapper(RAW));
case Intrinsic::invariant_end:
case Intrinsic::vastart:
Expand Down
127 changes: 127 additions & 0 deletions llvm/lib/CheerpUtils/ThreadLocalLowering.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
//===-- ThreadLocalLowering.cpp - Cheerp helper -------------------------===//
//
// Cheerp: The C++ compiler for the Web
//
// This file is distributed under the Apache License v2.0 with LLVM Exceptions.
// See LICENSE.TXT for details.
//
// Copyright 2024-2025 Leaning Technologies
//
//===----------------------------------------------------------------------===//

#include "llvm/Cheerp/ThreadLocalLowering.h"
#include "llvm/Cheerp/GlobalDepsAnalyzer.h"
#include "llvm/Cheerp/LinearMemoryHelper.h"
#include "llvm/Cheerp/InvokeWrapping.h"
#include "llvm/Cheerp/Registerize.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IRBuilder.h"

using namespace llvm;

namespace cheerp
{

static Function* getOrCreateThreadLocalWrapper(Module* M, GlobalDepsAnalyzer& GDA)
{
Type* i8Ty = IntegerType::getInt8Ty(M->getContext());
Type* i8PtrTy = PointerType::get(i8Ty, 0);
Type* i32Ty = IntegerType::getInt32Ty(M->getContext());
Type* argTy[] = {i32Ty};
FunctionType* fTy = FunctionType::get(i8PtrTy,ArrayRef<Type*>(argTy, 1), false);
Function* wrapper = cast<Function>(M->getOrInsertFunction("__getThreadLocalAddress", fTy).getCallee());
if (!wrapper->empty())
return wrapper;

BasicBlock* entry = BasicBlock::Create(M->getContext(),"entry", wrapper);
IRBuilder<> Builder(entry);
// Get the thread local address.
Function* threadPointerIntrinsic = Intrinsic::getDeclaration(M, Intrinsic::cheerp_get_thread_pointer);
Value* threadPointer = Builder.CreateCall(threadPointerIntrinsic);
// Add the offset argument
Value* offset = wrapper->getArg(0);
Value* address = Builder.CreateAdd(threadPointer, offset);
// Bitcast to a pointer
address = Builder.CreateIntToPtr(address, i8PtrTy);
Builder.CreateRet(address);

wrapper->setSection("asmjs");
GDA.insertAsmJSExport(wrapper);
return wrapper;
}

bool replaceThreadLocalIntrinsicWithFunction(Function& F, GlobalDepsAnalyzer& GDA)
{
Module* M = F.getParent();
bool changed = false;
SmallVector<Instruction*, 8> deleteList;

for (BasicBlock& BB: F)
{
for (Instruction& I: BB)
{
if (isa<IntrinsicInst>(I))
{
IntrinsicInst& II = cast<IntrinsicInst>(I);
Intrinsic::ID intrId = II.getIntrinsicID();
if (intrId == Intrinsic::threadlocal_address)
{
IRBuilder<> Builder(&II);
// Replace call to intrinsic with function
// 1. Use an intrinsic that will be the offset for the threadlocal.
Type* argTy[] = {II.getOperand(0)->getType()};
Function* offsetIntrinsic = Intrinsic::getDeclaration(M, Intrinsic::cheerp_get_threadlocal_offset, argTy);
Value* offset = Builder.CreateCall(offsetIntrinsic, II.getOperand(0));
// 2. Pass this offset to the wasm function that will calculate the address from the thread pointer.
Function* newFunc = getOrCreateThreadLocalWrapper(M, GDA);
Value* newCall = Builder.CreateCall(newFunc, offset);
// 3. Bitcast return code from this function to required type.
Type* origType = II.getType();
if (origType != newCall->getType())
newCall = Builder.CreateBitCast(newCall, origType);
I.replaceAllUsesWith(newCall);
deleteList.push_back(&I);
changed = true;
}
}
}
}
for (Instruction* I: deleteList)
I->eraseFromParent();
return changed;
}

PreservedAnalyses ThreadLocalLoweringInnerPass::run(Function& F, FunctionAnalysisManager& FAM)
{
if (F.getSection() == "asmjs")
return PreservedAnalyses::all();

// Find calls to threadlocal.address intrinsic, replace with calls to function.
bool changed = replaceThreadLocalIntrinsicWithFunction(F, GDA);
if (!changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<PointerAnalysis>();
PA.preserve<RegisterizeAnalysis>();
PA.preserve<GlobalDepsAnalysis>();
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<InvokeWrappingAnalysis>();
return PA;
}

PreservedAnalyses ThreadLocalLoweringPass::run(Module& M, ModuleAnalysisManager& MAM)
{
FunctionPassManager FPM;

GlobalDepsAnalyzer& GDA = MAM.getResult<GlobalDepsAnalysis>(M);
FPM.addPass(ThreadLocalLoweringInnerPass(GDA));

ModulePassManager MPM;

MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
PreservedAnalyses PA = MPM.run(M, MAM);
return PA;
}

}
Loading
Loading