Skip to content

Commit 0da83a4

Browse files
committed
[UR][L0] Refine default device logic in urDeviceGet
1 parent 25fbd1f commit 0da83a4

File tree

1 file changed

+137
-109
lines changed
  • unified-runtime/source/adapters/level_zero

1 file changed

+137
-109
lines changed

unified-runtime/source/adapters/level_zero/device.cpp

Lines changed: 137 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -68,115 +68,6 @@ getRangeOfAllowedCopyEngines(const ur_device_handle_t &Device) {
6868

6969
namespace ur::level_zero {
7070

71-
ur_result_t urDeviceGet(
72-
/// [in] handle of the platform instance
73-
ur_platform_handle_t Platform,
74-
/// [in] the type of the devices.
75-
ur_device_type_t DeviceType,
76-
/// [in] the number of devices to be added to phDevices. If phDevices in not
77-
/// NULL then NumEntries should be greater than zero, otherwise
78-
/// ::UR_RESULT_ERROR_INVALID_SIZE, will be returned.
79-
uint32_t NumEntries,
80-
/// [out][optional][range(0, NumEntries)] array of handle of devices. If
81-
/// NumEntries is less than the number of devices available, then
82-
/// platform shall only retrieve that number of devices.
83-
ur_device_handle_t *Devices,
84-
/// [out][optional] pointer to the number of devices. pNumDevices will be
85-
/// updated with the total number of devices available.
86-
uint32_t *NumDevices) {
87-
88-
auto Res = Platform->populateDeviceCacheIfNeeded();
89-
if (Res != UR_RESULT_SUCCESS) {
90-
return Res;
91-
}
92-
93-
// Filter available devices based on input DeviceType.
94-
std::vector<ur_device_handle_t> MatchedDevices;
95-
std::shared_lock<ur_shared_mutex> Lock(Platform->URDevicesCacheMutex);
96-
// We need to filter out composite devices when
97-
// ZE_FLAT_DEVICE_HIERARCHY=COMBINED. We can know if we are in combined
98-
// mode depending on the return value of zeDeviceGetRootDevice:
99-
// - If COMPOSITE, L0 returns cards as devices. Since we filter out
100-
// subdevices early, zeDeviceGetRootDevice must return nullptr, because we
101-
// only query for root-devices and they don't have any device higher up in
102-
// the hierarchy.
103-
// - If FLAT, according to L0 spec, zeDeviceGetRootDevice always returns
104-
// nullptr in this mode.
105-
// - If COMBINED, L0 returns tiles as devices, and zeDeviceGetRootdevice
106-
// returns the card containing a given tile.
107-
bool isCombinedMode =
108-
std::any_of(Platform->URDevicesCache.begin(),
109-
Platform->URDevicesCache.end(), [](const auto &D) {
110-
if (D->isSubDevice())
111-
return false;
112-
ze_device_handle_t RootDev = nullptr;
113-
// Query Root Device for root-devices.
114-
// We cannot use ZE2UR_CALL because under some circumstances
115-
// this call may return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE,
116-
// and ZE2UR_CALL will abort because it's not
117-
// UR_RESULT_SUCCESS. Instead, we use ZE_CALL_NOCHECK and we
118-
// check manually that the result is either
119-
// ZE_RESULT_SUCCESS or ZE_RESULT_ERROR_UNSUPPORTED_FEATURE.
120-
auto errc = ZE_CALL_NOCHECK(zeDeviceGetRootDevice,
121-
(D->ZeDevice, &RootDev));
122-
return (errc == ZE_RESULT_SUCCESS && RootDev != nullptr);
123-
});
124-
for (auto &D : Platform->URDevicesCache) {
125-
// Only ever return root-devices from urDeviceGet, but the
126-
// devices cache also keeps sub-devices.
127-
if (D->isSubDevice())
128-
continue;
129-
130-
bool Matched = false;
131-
switch (DeviceType) {
132-
case UR_DEVICE_TYPE_ALL:
133-
Matched = true;
134-
break;
135-
case UR_DEVICE_TYPE_GPU:
136-
case UR_DEVICE_TYPE_DEFAULT:
137-
Matched = (D->ZeDeviceProperties->type == ZE_DEVICE_TYPE_GPU);
138-
break;
139-
case UR_DEVICE_TYPE_CPU:
140-
Matched = (D->ZeDeviceProperties->type == ZE_DEVICE_TYPE_CPU);
141-
break;
142-
case UR_DEVICE_TYPE_FPGA:
143-
Matched = D->ZeDeviceProperties->type == ZE_DEVICE_TYPE_FPGA;
144-
break;
145-
case UR_DEVICE_TYPE_MCA:
146-
Matched = D->ZeDeviceProperties->type == ZE_DEVICE_TYPE_MCA;
147-
break;
148-
default:
149-
Matched = false;
150-
UR_LOG(WARN, "Unknown device type");
151-
break;
152-
}
153-
154-
if (Matched) {
155-
bool isComposite =
156-
isCombinedMode && (D->ZeDeviceProperties->flags &
157-
ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE) == 0;
158-
if (!isComposite) {
159-
MatchedDevices.push_back(D.get());
160-
// For UR_DEVICE_TYPE_DEFAULT only a single device should be returned,
161-
// so exit the loop after first proper match.
162-
if (DeviceType == UR_DEVICE_TYPE_DEFAULT)
163-
break;
164-
}
165-
}
166-
}
167-
168-
uint32_t ZeDeviceCount = MatchedDevices.size();
169-
170-
auto N = (std::min)(ZeDeviceCount, NumEntries);
171-
if (Devices)
172-
std::copy_n(MatchedDevices.begin(), N, Devices);
173-
174-
if (NumDevices)
175-
*NumDevices = ZeDeviceCount;
176-
177-
return UR_RESULT_SUCCESS;
178-
}
179-
18071
uint64_t calculateGlobalMemSize(ur_device_handle_t Device) {
18172
// Cache GlobalMemSize
18273
Device->ZeGlobalMemSize.Compute =
@@ -1462,6 +1353,143 @@ ur_result_t urDeviceGetInfo(
14621353
return UR_RESULT_SUCCESS;
14631354
}
14641355

1356+
ur_result_t urDeviceGet(
1357+
/// [in] handle of the platform instance
1358+
ur_platform_handle_t Platform,
1359+
/// [in] the type of the devices.
1360+
ur_device_type_t DeviceType,
1361+
/// [in] the number of devices to be added to phDevices. If phDevices in not
1362+
/// NULL then NumEntries should be greater than zero, otherwise
1363+
/// ::UR_RESULT_ERROR_INVALID_SIZE, will be returned.
1364+
uint32_t NumEntries,
1365+
/// [out][optional][range(0, NumEntries)] array of handle of devices. If
1366+
/// NumEntries is less than the number of devices available, then
1367+
/// platform shall only retrieve that number of devices.
1368+
ur_device_handle_t *Devices,
1369+
/// [out][optional] pointer to the number of devices. pNumDevices will be
1370+
/// updated with the total number of devices available.
1371+
uint32_t *NumDevices) {
1372+
1373+
auto Res = Platform->populateDeviceCacheIfNeeded();
1374+
if (Res != UR_RESULT_SUCCESS) {
1375+
return Res;
1376+
}
1377+
1378+
// Filter available devices based on input DeviceType.
1379+
std::vector<ur_device_handle_t> MatchedDevices;
1380+
std::shared_lock<ur_shared_mutex> Lock(Platform->URDevicesCacheMutex);
1381+
// We need to filter out composite devices when
1382+
// ZE_FLAT_DEVICE_HIERARCHY=COMBINED. We can know if we are in combined
1383+
// mode depending on the return value of zeDeviceGetRootDevice:
1384+
// - If COMPOSITE, L0 returns cards as devices. Since we filter out
1385+
// subdevices early, zeDeviceGetRootDevice must return nullptr, because we
1386+
// only query for root-devices and they don't have any device higher up in
1387+
// the hierarchy.
1388+
// - If FLAT, according to L0 spec, zeDeviceGetRootDevice always returns
1389+
// nullptr in this mode.
1390+
// - If COMBINED, L0 returns tiles as devices, and zeDeviceGetRootdevice
1391+
// returns the card containing a given tile.
1392+
1393+
// Track best discrete and integrated GPU candidates (device, max compute
1394+
// units)
1395+
std::pair<ur_device_handle_t, uint32_t> GPUDeviceDiscrete = {nullptr, 0};
1396+
std::pair<ur_device_handle_t, uint32_t> GPUDeviceIntegrated = {nullptr, 0};
1397+
bool Device_Default_GPU = false;
1398+
1399+
bool isCombinedMode =
1400+
std::any_of(Platform->URDevicesCache.begin(),
1401+
Platform->URDevicesCache.end(), [](const auto &D) {
1402+
if (D->isSubDevice())
1403+
return false;
1404+
ze_device_handle_t RootDev = nullptr;
1405+
// Query Root Device for root-devices.
1406+
// We cannot use ZE2UR_CALL because under some circumstances
1407+
// this call may return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE,
1408+
// and ZE2UR_CALL will abort because it's not
1409+
// UR_RESULT_SUCCESS. Instead, we use ZE_CALL_NOCHECK and we
1410+
// check manually that the result is either
1411+
// ZE_RESULT_SUCCESS or ZE_RESULT_ERROR_UNSUPPORTED_FEATURE.
1412+
auto errc = ZE_CALL_NOCHECK(zeDeviceGetRootDevice,
1413+
(D->ZeDevice, &RootDev));
1414+
return (errc == ZE_RESULT_SUCCESS && RootDev != nullptr);
1415+
});
1416+
for (auto &D : Platform->URDevicesCache) {
1417+
// Only ever return root-devices from urDeviceGet, but the
1418+
// devices cache also keeps sub-devices.
1419+
if (D->isSubDevice())
1420+
continue;
1421+
1422+
bool Matched = false;
1423+
switch (DeviceType) {
1424+
case UR_DEVICE_TYPE_ALL:
1425+
Matched = true;
1426+
break;
1427+
case UR_DEVICE_TYPE_GPU:
1428+
case UR_DEVICE_TYPE_DEFAULT:
1429+
Matched = (D->ZeDeviceProperties->type == ZE_DEVICE_TYPE_GPU);
1430+
Device_Default_GPU = true;
1431+
break;
1432+
case UR_DEVICE_TYPE_CPU:
1433+
Matched = (D->ZeDeviceProperties->type == ZE_DEVICE_TYPE_CPU);
1434+
break;
1435+
case UR_DEVICE_TYPE_FPGA:
1436+
Matched = D->ZeDeviceProperties->type == ZE_DEVICE_TYPE_FPGA;
1437+
break;
1438+
case UR_DEVICE_TYPE_MCA:
1439+
Matched = D->ZeDeviceProperties->type == ZE_DEVICE_TYPE_MCA;
1440+
break;
1441+
default:
1442+
Matched = false;
1443+
UR_LOG(WARN, "Unknown device type");
1444+
break;
1445+
}
1446+
1447+
if (Matched) {
1448+
bool isComposite =
1449+
isCombinedMode && (D->ZeDeviceProperties->flags &
1450+
ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE) == 0;
1451+
if (!isComposite) {
1452+
// In case of DeviceType is GPU or DEFAULT, pick only the most powerful
1453+
// device.
1454+
if (Device_Default_GPU) {
1455+
uint32_t maxComputeUnits = 0;
1456+
ur_result_t UrRet = ur::level_zero::urDeviceGetInfo(
1457+
D.get(), UR_DEVICE_INFO_MAX_COMPUTE_UNITS,
1458+
sizeof(maxComputeUnits), &maxComputeUnits, nullptr);
1459+
maxComputeUnits = (UrRet == UR_RESULT_SUCCESS) ? maxComputeUnits : 0;
1460+
auto &BestGpu =
1461+
D->isIntegrated() ? GPUDeviceIntegrated : GPUDeviceDiscrete;
1462+
if (!BestGpu.first || maxComputeUnits > BestGpu.second)
1463+
BestGpu = std::make_pair(D.get(), maxComputeUnits);
1464+
} else {
1465+
MatchedDevices.push_back(D.get());
1466+
}
1467+
}
1468+
}
1469+
}
1470+
1471+
// Handle GPU/DEFAULT device selection outside the loop
1472+
if (Device_Default_GPU) {
1473+
// Prefer discrete GPU over integrated GPU
1474+
if (GPUDeviceDiscrete.first) {
1475+
MatchedDevices = {GPUDeviceDiscrete.first};
1476+
} else if (GPUDeviceIntegrated.first) {
1477+
MatchedDevices = {GPUDeviceIntegrated.first};
1478+
}
1479+
}
1480+
1481+
uint32_t ZeDeviceCount = MatchedDevices.size();
1482+
1483+
auto N = (std::min)(ZeDeviceCount, NumEntries);
1484+
if (Devices)
1485+
std::copy_n(MatchedDevices.begin(), N, Devices);
1486+
1487+
if (NumDevices)
1488+
*NumDevices = ZeDeviceCount;
1489+
1490+
return UR_RESULT_SUCCESS;
1491+
}
1492+
14651493
bool CopyEngineRequested(const ur_device_handle_t &Device) {
14661494
int LowerCopyQueueIndex = getRangeOfAllowedCopyEngines(Device).first;
14671495
int UpperCopyQueueIndex = getRangeOfAllowedCopyEngines(Device).second;

0 commit comments

Comments
 (0)