Skip to content

Commit 61de220

Browse files
[SYCL] fix for __sycl_unregister_lib() on Windows and tests (#19633)
`__sycl_unregister_lib()` is not being called on Windows when using shared libraries due to a limitation in LLVM/clang To work around this, on Windows we register both `__sycl_register_lib()` and an `std::atexit` handler that will call `__sycl_unregister_lib()` . Further, it was discovered that freeing of the device globals during device images destruction is duplicate and unnecessary. The `~context_impl` destructor handles that (and handles it correctly, because a context is needed to free USM memory). So we remove the unneeded duplication. Adding a test that stresses `__sycl_unregister_lib()` and makes sure there are no resource leaks --------- Signed-off-by: Chris Perkins <[email protected]>
1 parent ed3767c commit 61de220

File tree

10 files changed

+324
-20
lines changed

10 files changed

+324
-20
lines changed

clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1293,6 +1293,48 @@ class BinaryWrapper {
12931293
appendToGlobalDtors(M, Func, /*Priority*/ 1);
12941294
}
12951295

1296+
void createSyclRegisterWithAtexitUnregister(GlobalVariable *BinDesc) {
1297+
auto *UnregFuncTy =
1298+
FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
1299+
auto *UnregFunc =
1300+
Function::Create(UnregFuncTy, GlobalValue::InternalLinkage,
1301+
"sycl.descriptor_unreg.atexit", &M);
1302+
UnregFunc->setSection(".text.startup");
1303+
1304+
// Declaration for __sycl_unregister_lib(void*).
1305+
auto *UnregTargetTy =
1306+
FunctionType::get(Type::getVoidTy(C), getPtrTy(), /*isVarArg=*/false);
1307+
FunctionCallee UnregTargetC =
1308+
M.getOrInsertFunction("__sycl_unregister_lib", UnregTargetTy);
1309+
1310+
IRBuilder<> UnregBuilder(BasicBlock::Create(C, "entry", UnregFunc));
1311+
UnregBuilder.CreateCall(UnregTargetC, BinDesc);
1312+
UnregBuilder.CreateRetVoid();
1313+
1314+
auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
1315+
auto *RegFunc = Function::Create(RegFuncTy, GlobalValue::InternalLinkage,
1316+
"sycl.descriptor_reg", &M);
1317+
RegFunc->setSection(".text.startup");
1318+
1319+
auto *RegTargetTy =
1320+
FunctionType::get(Type::getVoidTy(C), getPtrTy(), false);
1321+
FunctionCallee RegTargetC =
1322+
M.getOrInsertFunction("__sycl_register_lib", RegTargetTy);
1323+
1324+
// `atexit` takes a `void(*)()` function pointer arg and returns an i32.
1325+
FunctionType *AtExitTy =
1326+
FunctionType::get(Type::getInt32Ty(C), getPtrTy(), false);
1327+
FunctionCallee AtExitC = M.getOrInsertFunction("atexit", AtExitTy);
1328+
1329+
IRBuilder<> RegBuilder(BasicBlock::Create(C, "entry", RegFunc));
1330+
RegBuilder.CreateCall(RegTargetC, BinDesc);
1331+
RegBuilder.CreateCall(AtExitC, UnregFunc);
1332+
RegBuilder.CreateRetVoid();
1333+
1334+
// Add this function to global destructors.
1335+
appendToGlobalCtors(M, RegFunc, /*Priority*/ 1);
1336+
}
1337+
12961338
public:
12971339
BinaryWrapper(StringRef Target, StringRef ToolName,
12981340
StringRef SymPropBCFiles = "")
@@ -1370,8 +1412,13 @@ class BinaryWrapper {
13701412

13711413
if (EmitRegFuncs) {
13721414
GlobalVariable *Desc = *DescOrErr;
1373-
createRegisterFunction(Kind, Desc);
1374-
createUnregisterFunction(Kind, Desc);
1415+
if (Kind == OffloadKind::SYCL &&
1416+
Triple(M.getTargetTriple()).isOSWindows()) {
1417+
createSyclRegisterWithAtexitUnregister(Desc);
1418+
} else {
1419+
createRegisterFunction(Kind, Desc);
1420+
createUnregisterFunction(Kind, Desc);
1421+
}
13751422
}
13761423
}
13771424
return &M;

llvm/lib/Frontend/Offloading/SYCLOffloadWrapper.cpp

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "llvm/Support/ErrorHandling.h"
3535
#include "llvm/Support/LineIterator.h"
3636
#include "llvm/Support/PropertySetIO.h"
37+
#include "llvm/TargetParser/Triple.h"
3738
#include "llvm/Transforms/Utils/ModuleUtils.h"
3839
#include <memory>
3940
#include <string>
@@ -734,6 +735,50 @@ struct Wrapper {
734735
// Add this function to global destructors.
735736
appendToGlobalDtors(M, Func, /*Priority*/ 1);
736737
}
738+
739+
void createSyclRegisterWithAtexitUnregister(GlobalVariable *FatbinDesc) {
740+
auto *UnregFuncTy =
741+
FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
742+
auto *UnregFunc =
743+
Function::Create(UnregFuncTy, GlobalValue::InternalLinkage,
744+
"sycl.descriptor_unreg.atexit", &M);
745+
UnregFunc->setSection(".text.startup");
746+
747+
// Declaration for __sycl_unregister_lib(void*).
748+
auto *UnregTargetTy =
749+
FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C), false);
750+
FunctionCallee UnregTargetC =
751+
M.getOrInsertFunction("__sycl_unregister_lib", UnregTargetTy);
752+
753+
// Body of the unregister wrapper.
754+
IRBuilder<> UnregBuilder(BasicBlock::Create(C, "entry", UnregFunc));
755+
UnregBuilder.CreateCall(UnregTargetC, FatbinDesc);
756+
UnregBuilder.CreateRetVoid();
757+
758+
auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
759+
auto *RegFunc = Function::Create(RegFuncTy, GlobalValue::InternalLinkage,
760+
"sycl.descriptor_reg", &M);
761+
RegFunc->setSection(".text.startup");
762+
763+
auto *RegTargetTy =
764+
FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C), false);
765+
FunctionCallee RegTargetC =
766+
M.getOrInsertFunction("__sycl_register_lib", RegTargetTy);
767+
768+
// `atexit` takes a `void(*)()` function pointer arg and returns an i32.
769+
FunctionType *AtExitTy = FunctionType::get(
770+
Type::getInt32Ty(C), PointerType::getUnqual(C), false);
771+
FunctionCallee AtExitC = M.getOrInsertFunction("atexit", AtExitTy);
772+
773+
IRBuilder<> RegBuilder(BasicBlock::Create(C, "entry", RegFunc));
774+
RegBuilder.CreateCall(RegTargetC, FatbinDesc);
775+
RegBuilder.CreateCall(AtExitC, UnregFunc);
776+
RegBuilder.CreateRetVoid();
777+
778+
// Finally, add to global constructors.
779+
appendToGlobalCtors(M, RegFunc, /*Priority*/ 1);
780+
}
781+
737782
}; // end of Wrapper
738783

739784
} // anonymous namespace
@@ -747,7 +792,11 @@ Error llvm::offloading::wrapSYCLBinaries(llvm::Module &M,
747792
return createStringError(inconvertibleErrorCode(),
748793
"No binary descriptors created.");
749794

750-
W.createRegisterFatbinFunction(Desc);
751-
W.createUnregisterFunction(Desc);
795+
if (Triple(M.getTargetTriple()).isOSWindows()) {
796+
W.createSyclRegisterWithAtexitUnregister(Desc);
797+
} else {
798+
W.createRegisterFatbinFunction(Desc);
799+
W.createUnregisterFunction(Desc);
800+
}
752801
return Error::success();
753802
}

sycl/source/detail/context_impl.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,8 @@ context_impl::~context_impl() {
125125
DeviceGlobalMapEntry *DGEntry =
126126
detail::ProgramManager::getInstance().getDeviceGlobalEntry(
127127
DeviceGlobal);
128-
DGEntry->removeAssociatedResources(this);
128+
if (DGEntry != nullptr)
129+
DGEntry->removeAssociatedResources(this);
129130
}
130131
MCachedLibPrograms.clear();
131132
// TODO catch an exception and put it to list of asynchronous exceptions

sycl/source/detail/device_global_map.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ class DeviceGlobalMap {
9494
});
9595
if (findDevGlobalByValue != MPtr2DeviceGlobal.end())
9696
MPtr2DeviceGlobal.erase(findDevGlobalByValue);
97+
9798
MDeviceGlobals.erase(DevGlobalIt);
9899
}
99100
}
@@ -119,8 +120,7 @@ class DeviceGlobalMap {
119120
DeviceGlobalMapEntry *getEntry(const void *DeviceGlobalPtr) {
120121
std::lock_guard<std::mutex> DeviceGlobalsGuard(MDeviceGlobalsMutex);
121122
auto Entry = MPtr2DeviceGlobal.find(DeviceGlobalPtr);
122-
assert(Entry != MPtr2DeviceGlobal.end() && "Device global entry not found");
123-
return Entry->second;
123+
return (Entry != MPtr2DeviceGlobal.end()) ? Entry->second : nullptr;
124124
}
125125

126126
DeviceGlobalMapEntry *

sycl/source/detail/device_global_map_entry.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,18 @@ DeviceGlobalUSMMem::~DeviceGlobalUSMMem() {
2121
// removeAssociatedResources is expected to have cleaned up both the pointer
2222
// and the event. When asserts are enabled the values are set, so we check
2323
// these here.
24+
auto ContextImplPtr = MAllocatingContext.lock();
25+
if (ContextImplPtr) {
26+
if (MPtr != nullptr) {
27+
detail::usm::freeInternal(MPtr, ContextImplPtr.get());
28+
MPtr = nullptr;
29+
}
30+
if (MInitEvent != nullptr) {
31+
ContextImplPtr->getAdapter().call<UrApiKind::urEventRelease>(MInitEvent);
32+
MInitEvent = nullptr;
33+
}
34+
}
35+
2436
assert(MPtr == nullptr && "MPtr has not been cleaned up.");
2537
assert(MInitEvent == nullptr && "MInitEvent has not been cleaned up.");
2638
}
@@ -63,6 +75,7 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) {
6375
assert(NewAllocIt.second &&
6476
"USM allocation for device and context already happened.");
6577
DeviceGlobalUSMMem &NewAlloc = NewAllocIt.first->second;
78+
NewAlloc.MAllocatingContext = CtxImpl.shared_from_this();
6679

6780
// Initialize here and save the event.
6881
{
@@ -120,6 +133,7 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(const context &Context) {
120133
assert(NewAllocIt.second &&
121134
"USM allocation for device and context already happened.");
122135
DeviceGlobalUSMMem &NewAlloc = NewAllocIt.first->second;
136+
NewAlloc.MAllocatingContext = CtxImpl.shared_from_this();
123137

124138
if (MDeviceGlobalPtr) {
125139
// C++ guarantees members appear in memory in the order they are declared,
@@ -161,12 +175,9 @@ void DeviceGlobalMapEntry::removeAssociatedResources(
161175
if (USMMem.MInitEvent != nullptr)
162176
CtxImpl->getAdapter().call<UrApiKind::urEventRelease>(
163177
USMMem.MInitEvent);
164-
#ifndef NDEBUG
165-
// For debugging we set the event and memory to some recognizable values
166-
// to allow us to check that this cleanup happens before erasure.
178+
// Set to nullptr to avoid double free.
167179
USMMem.MPtr = nullptr;
168180
USMMem.MInitEvent = nullptr;
169-
#endif
170181
MDeviceToUSMPtrMap.erase(USMPtrIt);
171182
}
172183
}
@@ -185,12 +196,9 @@ void DeviceGlobalMapEntry::cleanup() {
185196
detail::usm::freeInternal(USMMem.MPtr, CtxImpl);
186197
if (USMMem.MInitEvent != nullptr)
187198
CtxImpl->getAdapter().call<UrApiKind::urEventRelease>(USMMem.MInitEvent);
188-
#ifndef NDEBUG
189-
// For debugging we set the event and memory to some recognizable values
190-
// to allow us to check that this cleanup happens before erasure.
199+
// Set to nullptr to avoid double free.
191200
USMMem.MPtr = nullptr;
192201
USMMem.MInitEvent = nullptr;
193-
#endif
194202
}
195203
MDeviceToUSMPtrMap.clear();
196204
}

sycl/source/detail/device_global_map_entry.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ struct DeviceGlobalUSMMem {
4646
std::mutex MInitEventMutex;
4747
ur_event_handle_t MInitEvent = nullptr;
4848

49+
std::weak_ptr<context_impl> MAllocatingContext;
4950
friend struct DeviceGlobalMapEntry;
5051
};
5152

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3893,10 +3893,5 @@ extern "C" void __sycl_register_lib(sycl_device_binaries desc) {
38933893

38943894
// Executed as a part of current module's (.exe, .dll) static initialization
38953895
extern "C" void __sycl_unregister_lib(sycl_device_binaries desc) {
3896-
// Partial cleanup is not necessary at shutdown
3897-
#ifndef _WIN32
3898-
if (!sycl::detail::GlobalHandler::instance().isOkToDefer())
3899-
return;
39003896
sycl::detail::ProgramManager::getInstance().removeImages(desc);
3901-
#endif
39023897
}

sycl/test-e2e/Basic/stream/zero_buffer_size.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
// UNSUPPORTED: hip
2+
// UNSUPPORTED-TRACKER: CMPLRLLVM-69478
3+
14
// RUN: %{build} -o %t.out
25
// RUN: %{run} %t.out
36

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#include <sycl/detail/core.hpp>
2+
3+
#if defined(_WIN32)
4+
#define API_EXPORT __declspec(dllexport)
5+
#else
6+
#define API_EXPORT
7+
#endif
8+
9+
#ifndef INC
10+
#define INC 1
11+
#endif
12+
13+
#ifndef CLASSNAME
14+
#define CLASSNAME same
15+
#endif
16+
17+
#ifdef WITH_DEVICE_GLOBALS
18+
// Using device globals within the shared libraries only
19+
// works if the names do not collide. Note that we cannot
20+
// load a library multiple times if it has a device global.
21+
#define CONCAT_HELPER(a, b) a##b
22+
#define CONCAT(a, b) CONCAT_HELPER(a, b)
23+
24+
using SomeProperties = decltype(sycl::ext::oneapi::experimental::properties{});
25+
sycl::ext::oneapi::experimental::device_global<int, SomeProperties>
26+
CONCAT(DGVar, CLASSNAME) __attribute__((visibility("default")));
27+
28+
#endif // WITH_DEVICE_GLOBALS
29+
30+
extern "C" API_EXPORT void performIncrementation(sycl::queue &q,
31+
sycl::buffer<int, 1> &buf) {
32+
sycl::range<1> r = buf.get_range();
33+
q.submit([&](sycl::handler &cgh) {
34+
auto acc = buf.get_access<sycl::access::mode::write>(cgh);
35+
cgh.parallel_for<class CLASSNAME>(
36+
r, [=](sycl::id<1> idx) { acc[idx] += INC; });
37+
});
38+
}

0 commit comments

Comments
 (0)