From c5b38b752b2bdfffce602635d52e61a12c76aab2 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Tue, 2 Sep 2025 15:17:21 -0700 Subject: [PATCH] [UR] Fix usm pools creation failure at context initialization (#19921) Context creation was failing when sub-sub-devices were exposed, because the Level Zero adapter attempted to add multiple USM pools with identical descriptors. This occurred since, for the L0 backend, sub-sub-devices and their parent sub-devices share the same Level Zero device handle and should use the same USM pool. This PR resolves the issue by ensuring only devices with unique Level Zero handles are collected for USM pool creation, preventing duplicate pools. Additionally, this PR fixes an issue with an uninitialized `ur_device_partition_property_t` variable passed to UR. Previously, this could result in an unexpected value for `value.affinity_domain` when the property type was `UR_DEVICE_PARTITION_BY_CSLICE` resulting on error from adapter. --- sycl/source/detail/device_impl.cpp | 5 +-- .../context_create_sub_sub_device.cpp | 42 +++++++++++++++++++ .../source/adapters/level_zero/device.hpp | 14 ++++--- .../source/adapters/level_zero/usm.cpp | 3 +- .../source/adapters/level_zero/v2/usm.cpp | 2 +- 5 files changed, 56 insertions(+), 10 deletions(-) create mode 100644 sycl/test-e2e/Adapters/level_zero/context_create_sub_sub_device.cpp diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index da497d8a97783..f89b6d7e133e3 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -258,7 +258,7 @@ std::vector device_impl::create_sub_devices( affinityDomainToString(AffinityDomain) + "."); } - ur_device_partition_property_t Prop; + ur_device_partition_property_t Prop{}; Prop.type = UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; Prop.value.affinity_domain = static_cast(AffinityDomain); @@ -285,9 +285,8 @@ std::vector device_impl::create_sub_devices() const { "sycl::info::partition_property::ext_intel_partition_by_cslice."); } - ur_device_partition_property_t Prop; + ur_device_partition_property_t Prop{}; Prop.type = UR_DEVICE_PARTITION_BY_CSLICE; - ur_device_partition_properties_t Properties{}; Properties.stype = UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES; Properties.pProperties = &Prop; diff --git a/sycl/test-e2e/Adapters/level_zero/context_create_sub_sub_device.cpp b/sycl/test-e2e/Adapters/level_zero/context_create_sub_sub_device.cpp new file mode 100644 index 0000000000000..3c2c0523c39c3 --- /dev/null +++ b/sycl/test-e2e/Adapters/level_zero/context_create_sub_sub_device.cpp @@ -0,0 +1,42 @@ +// REQUIRES: arch-intel_gpu_pvc, level_zero +// UNSUPPORTED: gpu-intel-pvc-1T +// UNSUPPORTED-TRACKER: GSD-9121 + +// DEFINE: %{setup_env} = env ZE_FLAT_DEVICE_HIERARCHY=COMPOSITE ZE_AFFINITY_MASK=0 ZEX_NUMBER_OF_CCS=0:4 +// RUN: %{build} -o %t.out +// RUN: %{setup_env} %{run} %t.out + +// Check that context can be created successfully when sub-sub-devices are +// exposed. +#include +#include +#include + +using namespace sycl; + +int main() { + std::cout << "[info] start context_create_sub_sub_device test" << std::endl; + device d; + std::vector subsubdevices; + + auto subdevices = d.create_sub_devices< + info::partition_property::partition_by_affinity_domain>( + info::partition_affinity_domain::next_partitionable); + std::cout << "[info] sub device size = " << subdevices.size() << std::endl; + + for (auto &subdev : subdevices) { + subsubdevices = subdev.create_sub_devices< + info::partition_property::ext_intel_partition_by_cslice>(); + + std::cout << "[info] sub-sub device size = " << subsubdevices.size() + << std::endl; + } + + // Create contexts + context ctx1(d); + context ctx2(subdevices); + context ctx3(subsubdevices); + + std::cout << "[info] contexts created successfully" << std::endl; + return 0; +} diff --git a/unified-runtime/source/adapters/level_zero/device.hpp b/unified-runtime/source/adapters/level_zero/device.hpp index 84f81ac7c6b90..05e65fe3170e5 100644 --- a/unified-runtime/source/adapters/level_zero/device.hpp +++ b/unified-runtime/source/adapters/level_zero/device.hpp @@ -248,16 +248,20 @@ struct ur_device_handle_t_ : ur_object { ur::RefCount RefCount; }; -inline std::vector -CollectDevicesAndSubDevices(const std::vector &Devices) { +// Collects a flat vector of unique devices for USM memory pool creation. +// Traverses the input devices and their sub-devices, ensuring each Level Zero +// device handle appears only once in the result. +inline std::vector CollectDevicesForUsmPoolCreation( + const std::vector &Devices) { std::vector DevicesAndSubDevices; - std::unordered_set Seen; + std::unordered_set Seen; + std::function &)> CollectDevicesAndSubDevicesRec = [&](const std::vector &Devices) { for (auto &Device : Devices) { - // Only add device if has not been seen before. - if (Seen.insert(Device).second) { + // Only add device if ZeDevice has not been seen before. + if (Seen.insert(Device->ZeDevice).second) { DevicesAndSubDevices.push_back(Device); CollectDevicesAndSubDevicesRec(Device->SubDevices); } diff --git a/unified-runtime/source/adapters/level_zero/usm.cpp b/unified-runtime/source/adapters/level_zero/usm.cpp index ca2b462067eaa..0141a04140c13 100644 --- a/unified-runtime/source/adapters/level_zero/usm.cpp +++ b/unified-runtime/source/adapters/level_zero/usm.cpp @@ -1061,7 +1061,8 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, } } - auto DevicesAndSubDevices = CollectDevicesAndSubDevices(Context->Devices); + auto DevicesAndSubDevices = + CollectDevicesForUsmPoolCreation(Context->Devices); auto Descriptors = usm::pool_descriptor::createFromDevices( this, Context, DevicesAndSubDevices); for (auto &Desc : Descriptors) { diff --git a/unified-runtime/source/adapters/level_zero/v2/usm.cpp b/unified-runtime/source/adapters/level_zero/v2/usm.cpp index 0d49a8ad0a845..080ab75afb0bb 100644 --- a/unified-runtime/source/adapters/level_zero/v2/usm.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/usm.cpp @@ -168,7 +168,7 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t hContext, } auto devicesAndSubDevices = - CollectDevicesAndSubDevices(hContext->getDevices()); + CollectDevicesForUsmPoolCreation(hContext->getDevices()); auto descriptors = usm::pool_descriptor::createFromDevices( this, hContext, devicesAndSubDevices); for (auto &desc : descriptors) {