Skip to content

[SYCL] fix for __sycl_unregister_lib() on Windows and tests #19633

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: sycl
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 51 additions & 2 deletions clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1293,6 +1293,50 @@ class BinaryWrapper {
appendToGlobalDtors(M, Func, /*Priority*/ 1);
}

void createSyclRegisterWithAtexitUnregister(GlobalVariable *BinDesc) {
auto *UnregFuncTy =
FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
auto *UnregFunc =
Function::Create(UnregFuncTy, GlobalValue::InternalLinkage,
"sycl.descriptor_unreg.atexit", &M);
UnregFunc->setSection(".text.startup");

// Declaration for __sycl_unregister_lib(void*).
auto *UnregTargetTy =
FunctionType::get(Type::getVoidTy(C), getPtrTy(), false);
FunctionCallee UnregTargetC =
M.getOrInsertFunction("__sycl_unregister_lib", UnregTargetTy);

IRBuilder<> UnregBuilder(BasicBlock::Create(C, "entry", UnregFunc));
UnregBuilder.CreateCall(UnregTargetC, BinDesc);
UnregBuilder.CreateRetVoid();

auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
auto *RegFunc = Function::Create(RegFuncTy, GlobalValue::InternalLinkage,
"sycl.descriptor_reg", &M);
RegFunc->setSection(".text.startup");

auto *RegTargetTy =
FunctionType::get(Type::getVoidTy(C), getPtrTy(), false);
FunctionCallee RegTargetC =
M.getOrInsertFunction("__sycl_register_lib", RegTargetTy);

// `atexit` takes a `void(*)()` function pointer. In LLVM IR, this is
// typically represented as `i32 (ptr)`.
FunctionType *AtExitTy =
FunctionType::get(Type::getInt32Ty(C), getPtrTy(), false);
FunctionCallee AtExitC = M.getOrInsertFunction("atexit", AtExitTy);

IRBuilder<> RegBuilder(BasicBlock::Create(C, "entry", RegFunc));
RegBuilder.CreateCall(RegTargetC, BinDesc);
RegBuilder.CreateCall(AtExitC, UnregFunc);
RegBuilder.CreateRetVoid();

// Add this function to global destructors.
// Match priority of __tgt_register_lib
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Match priority of __tgt_register_lib
// Match priority of __sycl_register_lib

appendToGlobalCtors(M, RegFunc, /*Priority*/ 1);
}

public:
BinaryWrapper(StringRef Target, StringRef ToolName,
StringRef SymPropBCFiles = "")
Expand Down Expand Up @@ -1370,8 +1414,13 @@ class BinaryWrapper {

if (EmitRegFuncs) {
GlobalVariable *Desc = *DescOrErr;
createRegisterFunction(Kind, Desc);
createUnregisterFunction(Kind, Desc);
if (Kind == OffloadKind::SYCL &&
Triple(M.getTargetTriple()).isOSWindows()) {
createSyclRegisterWithAtexitUnregister(Desc);
} else {
createRegisterFunction(Kind, Desc);
createUnregisterFunction(Kind, Desc);
}
}
}
return &M;
Expand Down
54 changes: 52 additions & 2 deletions llvm/lib/Frontend/Offloading/SYCLOffloadWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/PropertySetIO.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <memory>
#include <string>
Expand Down Expand Up @@ -734,6 +735,51 @@ struct Wrapper {
// Add this function to global destructors.
appendToGlobalDtors(M, Func, /*Priority*/ 1);
}

void createSyclRegisterWithAtexitUnregister(GlobalVariable *FatbinDesc) {
auto *UnregFuncTy =
FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
auto *UnregFunc =
Function::Create(UnregFuncTy, GlobalValue::InternalLinkage,
"sycl.descriptor_unreg.atexit", &M);
UnregFunc->setSection(".text.startup");

// Declaration for __sycl_unregister_lib(void*).
auto *UnregTargetTy =
FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C), false);
FunctionCallee UnregTargetC =
M.getOrInsertFunction("__sycl_unregister_lib", UnregTargetTy);

// Body of the unregister wrapper.
IRBuilder<> UnregBuilder(BasicBlock::Create(C, "entry", UnregFunc));
UnregBuilder.CreateCall(UnregTargetC, FatbinDesc);
UnregBuilder.CreateRetVoid();

auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
auto *RegFunc = Function::Create(RegFuncTy, GlobalValue::InternalLinkage,
"sycl.descriptor_reg", &M);
RegFunc->setSection(".text.startup");

auto *RegTargetTy =
FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C), false);
FunctionCallee RegTargetC =
M.getOrInsertFunction("__sycl_register_lib", RegTargetTy);

// `atexit` takes a `void(*)()` function pointer. In LLVM IR, this is
// typically represented as `i32 (ptr)`.
FunctionType *AtExitTy = FunctionType::get(
Type::getInt32Ty(C), PointerType::getUnqual(C), false);
FunctionCallee AtExitC = M.getOrInsertFunction("atexit", AtExitTy);

IRBuilder<> RegBuilder(BasicBlock::Create(C, "entry", RegFunc));
RegBuilder.CreateCall(RegTargetC, FatbinDesc);
RegBuilder.CreateCall(AtExitC, UnregFunc);
RegBuilder.CreateRetVoid();

// Finally, add to global constructors.
appendToGlobalCtors(M, RegFunc, /*Priority*/ 1);
}

}; // end of Wrapper

} // anonymous namespace
Expand All @@ -747,7 +793,11 @@ Error llvm::offloading::wrapSYCLBinaries(llvm::Module &M,
return createStringError(inconvertibleErrorCode(),
"No binary descriptors created.");

W.createRegisterFatbinFunction(Desc);
W.createUnregisterFunction(Desc);
if (Triple(M.getTargetTriple()).isOSWindows()) {
W.createSyclRegisterWithAtexitUnregister(Desc);
} else {
W.createRegisterFatbinFunction(Desc);
W.createUnregisterFunction(Desc);
}
return Error::success();
}
19 changes: 0 additions & 19 deletions sycl/source/detail/device_global_map.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,25 +73,6 @@ class DeviceGlobalMap {
}
}

void eraseEntries(const RTDeviceBinaryImage *Img) {
const auto &DeviceGlobals = Img->getDeviceGlobals();
std::lock_guard<std::mutex> DeviceGlobalsGuard(MDeviceGlobalsMutex);
for (const sycl_device_binary_property &DeviceGlobal : DeviceGlobals) {
if (auto DevGlobalIt = MDeviceGlobals.find(DeviceGlobal->Name);
DevGlobalIt != MDeviceGlobals.end()) {
auto findDevGlobalByValue = std::find_if(
MPtr2DeviceGlobal.begin(), MPtr2DeviceGlobal.end(),
[&DevGlobalIt](
const std::pair<const void *, DeviceGlobalMapEntry *> &Entry) {
return Entry.second == DevGlobalIt->second.get();
});
if (findDevGlobalByValue != MPtr2DeviceGlobal.end())
MPtr2DeviceGlobal.erase(findDevGlobalByValue);
MDeviceGlobals.erase(DevGlobalIt);
}
}
}

void addOrInitialize(const void *DeviceGlobalPtr, const char *UniqueId) {
std::lock_guard<std::mutex> DeviceGlobalsGuard(MDeviceGlobalsMutex);
auto ExistingDeviceGlobal = MDeviceGlobals.find(UniqueId);
Expand Down
7 changes: 0 additions & 7 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2180,8 +2180,6 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) {
m_VFSet2BinImage.erase(SetName);
}

m_DeviceGlobals.eraseEntries(Img);

{
std::lock_guard<std::mutex> HostPipesGuard(m_HostPipesMutex);
auto HostPipes = Img->getHostPipes();
Expand Down Expand Up @@ -3824,10 +3822,5 @@ extern "C" void __sycl_register_lib(sycl_device_binaries desc) {

// Executed as a part of current module's (.exe, .dll) static initialization
extern "C" void __sycl_unregister_lib(sycl_device_binaries desc) {
// Partial cleanup is not necessary at shutdown
#ifndef _WIN32
if (!sycl::detail::GlobalHandler::instance().isOkToDefer())
return;
sycl::detail::ProgramManager::getInstance().removeImages(desc);
#endif
}
3 changes: 3 additions & 0 deletions sycl/test-e2e/Basic/stream/zero_buffer_size.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// UNSUPPORTED: hip
// UNSUPPORTED-TRACKER: CMPLRLLVM-69478

// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

Expand Down
25 changes: 25 additions & 0 deletions sycl/test-e2e/IntermediateLib/Inputs/incrementing_lib.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#include <sycl/detail/core.hpp>

#if defined(_WIN32)
#define API_EXPORT __declspec(dllexport)
#else
#define API_EXPORT
#endif

#ifndef INC
#define INC 1
#endif

#ifndef CLASSNAME
#define CLASSNAME same
#endif

extern "C" API_EXPORT void performIncrementation(sycl::queue &q,
sycl::buffer<int, 1> &buf) {
sycl::range<1> r = buf.get_range();
q.submit([&](sycl::handler &cgh) {
auto acc = buf.get_access<sycl::access::mode::write>(cgh);
cgh.parallel_for<class CLASSNAME>(
r, [=](sycl::id<1> idx) { acc[idx] += INC; });
});
}
142 changes: 142 additions & 0 deletions sycl/test-e2e/IntermediateLib/multi_lib_app.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
// UNSUPPORTED: cuda || hip
// UNSUPPORTED-TRACKER: CMPLRLLVM-69415

// REQUIRES: level_zero
Comment on lines +1 to +4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we remove "REQUIRES: level_zero"? It looks like test is supposed to work on OpenCL backend as well.
If you want to additionally check leaks on level_zero backend, then probably something like this should be done:
// RUN: %if level_zero %{%{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}


// DEFINE: %{fPIC_flag} = %if windows %{%} %else %{-fPIC%}
// DEFINE: %{shared_lib_ext} = %if windows %{dll%} %else %{so%}

// clang-format off
// IMPORTANT -DSO_PATH='R"(%T)"' WTF ??
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// IMPORTANT -DSO_PATH='R"(%T)"' WTF ??
// IMPORTANT -DSO_PATH='R"(%T)"'

// We need to capture %T, the build directory, in a string
// and the normal STRINGIFY() macros hack won't work.
// Because on Windows, the path delimiters are \,
// which C++ preprocessor converts to escape sequences,
// which becomes a nightmare.
// SO the hack here is to put heredoc in the definition
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// SO the hack here is to put heredoc in the definition
// So the hack here is to put heredoc in the definition

// and use single quotes, which Python forgivingly accepts.
// clang-format on

// RUN: %{build} %{fPIC_flag} -DSO_PATH='R"(%T)"' -o %t.out

// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=1 -o %T/lib_a.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp
// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=2 -o %T/lib_b.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp
// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=4 -o %T/lib_c.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp

// RUN: env UR_L0_LEAKS_DEBUG=1 %{run} %t.out

// This test uses a kernel of the same name in three different shared libraries.
// It loads each library, calls the kernel, and checks that the incrementation
// is done correctly, and then unloads the library.
// This test ensures that __sycl_register_lib() and __sycl_unregister_lib()
// are called correctly, and that the device images are cleaned up properly.

#include <sycl/detail/core.hpp>

using namespace sycl::ext::oneapi::experimental;


#ifdef _WIN32
#include <windows.h>

void *loadOsLibrary(const std::string &LibraryPath) {
HMODULE h =
LoadLibraryExA(LibraryPath.c_str(), NULL, LOAD_WITH_ALTERED_SEARCH_PATH);
return (void *)h;
}
int unloadOsLibrary(void *Library) {
return FreeLibrary((HMODULE)Library) ? 0 : 1;
}
void *getOsLibraryFuncAddress(void *Library, const std::string &FunctionName) {
return (void *)GetProcAddress((HMODULE)Library, FunctionName.c_str());
}

#else
#include <dlfcn.h>

void *loadOsLibrary(const std::string &LibraryPath) {
void *so = dlopen(LibraryPath.c_str(), RTLD_NOW);
if (!so) {
char *Error = dlerror();
std::cerr << "dlopen(" << LibraryPath << ") failed with <"
<< (Error ? Error : "unknown error") << ">" << std::endl;
}
return so;
}

int unloadOsLibrary(void *Library) { return dlclose(Library); }

void *getOsLibraryFuncAddress(void *Library, const std::string &FunctionName) {
return dlsym(Library, FunctionName.c_str());
}
#endif

// Define the function pointer type for performIncrementation
using IncFuncT = void(sycl::queue &, sycl::buffer<int, 1> &);

void initializeBuffer(sycl::buffer<int, 1> &buf) {
auto acc = sycl::host_accessor<int, 1>(buf);
for (size_t i = 0; i < buf.size(); ++i)
acc[i] = 0;
}

void checkIncrementation(sycl::buffer<int, 1> &buf, int val) {
auto acc = sycl::host_accessor<int, 1>(buf);
for (size_t i = 0; i < buf.size(); ++i) {
std::cout << acc[i] << " ";
assert(acc[i] == val);
}
std::cout << std::endl;
}

int main() {
sycl::queue q;

sycl::range<1> r(8);
sycl::buffer<int, 1> buf(r);
initializeBuffer(buf);

std::string base_path = SO_PATH;

#ifdef _WIN32
std::string path_to_lib_a = base_path + "\\lib_a.dll";
std::string path_to_lib_b = base_path + "\\lib_b.dll";
std::string path_to_lib_c = base_path + "\\lib_c.dll";
#else
std::string path_to_lib_a = base_path + "/lib_a.so";
std::string path_to_lib_b = base_path + "/lib_b.so";
std::string path_to_lib_c = base_path + "/lib_c.so";
#endif

std::cout << "paths: " << path_to_lib_a << std::endl;
std::cout << "SO_PATH: " << SO_PATH << std::endl;

void *lib_a = loadOsLibrary(path_to_lib_a);
void *f = getOsLibraryFuncAddress(lib_a, "performIncrementation");
auto performIncrementationFuncA = reinterpret_cast<IncFuncT *>(f);
performIncrementationFuncA(q, buf); // call the function from lib_a
q.wait();
checkIncrementation(buf, 1);
unloadOsLibrary(lib_a);
std::cout << "lib_a done" << std::endl;

void *lib_b = loadOsLibrary(path_to_lib_b);
f = getOsLibraryFuncAddress(lib_b, "performIncrementation");
auto performIncrementationFuncB = reinterpret_cast<IncFuncT *>(f);
performIncrementationFuncB(q, buf); // call the function from lib_b
q.wait();
checkIncrementation(buf, 1 + 2);
unloadOsLibrary(lib_b);
std::cout << "lib_b done" << std::endl;

void *lib_c = loadOsLibrary(path_to_lib_c);
f = getOsLibraryFuncAddress(lib_c, "performIncrementation");
auto performIncrementationFuncC = reinterpret_cast<IncFuncT *>(f);
q.wait();
performIncrementationFuncC(q, buf); // call the function from lib_c
checkIncrementation(buf, 1 + 2 + 4);
unloadOsLibrary(lib_c);
std::cout << "lib_c done" << std::endl;

return 0;
}
10 changes: 0 additions & 10 deletions sycl/unittests/program_manager/Cleanup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,16 +303,6 @@ void checkAllInvolvedContainers(ProgramManagerExposed &PM, size_t ExpectedCount,
}
EXPECT_EQ(PM.getKernelImplicitLocalArgPos().size(), ExpectedCount) << Comment;

{
sycl::detail::DeviceGlobalMap &DeviceGlobalMap = PM.getDeviceGlobals();
EXPECT_EQ(DeviceGlobalMap.size(), ExpectedCount) << Comment;
EXPECT_TRUE(DeviceGlobalMap.count(generateRefName("A", "DeviceGlobal")) > 0)
<< Comment;
EXPECT_TRUE(DeviceGlobalMap.count(generateRefName("B", "DeviceGlobal")) > 0)
<< Comment;
EXPECT_EQ(DeviceGlobalMap.getPointerMap().size(), ExpectedCount) << Comment;
}

Comment on lines -306 to -315
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to add a test for new behavior to compensate the removed test?

{
EXPECT_EQ(PM.getHostPipes().size(), ExpectedCount) << Comment;
EXPECT_TRUE(PM.getHostPipes().count(generateRefName("A", "HostPipe")) > 0)
Expand Down