Skip to content

Commit cbb42dd

Browse files
authored
fix: Fix SEGFAULT in BLS Model Loading (#420)
* fix: Fix SEGFAULT in BLS Model Loading This change contains the minimal change to avoid SEGFAULT failures during the BLS Model Loading test. The crash itself is cause by deleting a shared-memory region's control allocation which can happen when we somehow endup with handle{1} (the control region) in our accounting, and then delete it when its refcount reaches zero. This change does fix the root cause of how we're accounting for handle{1} (which we should never have). * rename variable
1 parent 4dae720 commit cbb42dd

File tree

1 file changed

+11
-0
lines changed

1 file changed

+11
-0
lines changed

src/shm_manager.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@
4343
namespace triton { namespace backend { namespace python {
4444
namespace bi = boost::interprocess;
4545

46+
static constexpr bi::managed_external_buffer::handle_t kShmControlRegionHandle{
47+
1};
48+
4649
class CUDAMemoryPoolManager {
4750
public:
4851
CUDAMemoryPoolManager() : triton_memory_manager_(nullptr) {}
@@ -166,6 +169,10 @@ class SharedMemoryManager {
166169

167170
void Deallocate(bi::managed_external_buffer::handle_t handle)
168171
{
172+
// Do not delete the control region, to avoid undefined behavior.
173+
if (handle == kShmControlRegionHandle) {
174+
return;
175+
}
169176
bi::scoped_lock<bi::interprocess_mutex> guard{*shm_mutex_};
170177
GrowIfNeeded(0);
171178
void* ptr = managed_buffer_->get_address_from_handle(handle);
@@ -174,6 +181,10 @@ class SharedMemoryManager {
174181

175182
void DeallocateUnsafe(bi::managed_external_buffer::handle_t handle)
176183
{
184+
// Do not delete the control region, to avoid undefined behavior.
185+
if (handle == kShmControlRegionHandle) {
186+
return;
187+
}
177188
void* ptr = managed_buffer_->get_address_from_handle(handle);
178189
managed_buffer_->deallocate(ptr);
179190
}

0 commit comments

Comments
 (0)