@@ -68,115 +68,6 @@ getRangeOfAllowedCopyEngines(const ur_device_handle_t &Device) {
68
68
69
69
namespace ur ::level_zero {
70
70
71
- ur_result_t urDeviceGet (
72
- // / [in] handle of the platform instance
73
- ur_platform_handle_t Platform,
74
- // / [in] the type of the devices.
75
- ur_device_type_t DeviceType,
76
- // / [in] the number of devices to be added to phDevices. If phDevices in not
77
- // / NULL then NumEntries should be greater than zero, otherwise
78
- // / ::UR_RESULT_ERROR_INVALID_SIZE, will be returned.
79
- uint32_t NumEntries,
80
- // / [out][optional][range(0, NumEntries)] array of handle of devices. If
81
- // / NumEntries is less than the number of devices available, then
82
- // / platform shall only retrieve that number of devices.
83
- ur_device_handle_t *Devices,
84
- // / [out][optional] pointer to the number of devices. pNumDevices will be
85
- // / updated with the total number of devices available.
86
- uint32_t *NumDevices) {
87
-
88
- auto Res = Platform->populateDeviceCacheIfNeeded ();
89
- if (Res != UR_RESULT_SUCCESS) {
90
- return Res;
91
- }
92
-
93
- // Filter available devices based on input DeviceType.
94
- std::vector<ur_device_handle_t > MatchedDevices;
95
- std::shared_lock<ur_shared_mutex> Lock (Platform->URDevicesCacheMutex );
96
- // We need to filter out composite devices when
97
- // ZE_FLAT_DEVICE_HIERARCHY=COMBINED. We can know if we are in combined
98
- // mode depending on the return value of zeDeviceGetRootDevice:
99
- // - If COMPOSITE, L0 returns cards as devices. Since we filter out
100
- // subdevices early, zeDeviceGetRootDevice must return nullptr, because we
101
- // only query for root-devices and they don't have any device higher up in
102
- // the hierarchy.
103
- // - If FLAT, according to L0 spec, zeDeviceGetRootDevice always returns
104
- // nullptr in this mode.
105
- // - If COMBINED, L0 returns tiles as devices, and zeDeviceGetRootdevice
106
- // returns the card containing a given tile.
107
- bool isCombinedMode =
108
- std::any_of (Platform->URDevicesCache .begin (),
109
- Platform->URDevicesCache .end (), [](const auto &D) {
110
- if (D->isSubDevice ())
111
- return false ;
112
- ze_device_handle_t RootDev = nullptr ;
113
- // Query Root Device for root-devices.
114
- // We cannot use ZE2UR_CALL because under some circumstances
115
- // this call may return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE,
116
- // and ZE2UR_CALL will abort because it's not
117
- // UR_RESULT_SUCCESS. Instead, we use ZE_CALL_NOCHECK and we
118
- // check manually that the result is either
119
- // ZE_RESULT_SUCCESS or ZE_RESULT_ERROR_UNSUPPORTED_FEATURE.
120
- auto errc = ZE_CALL_NOCHECK (zeDeviceGetRootDevice,
121
- (D->ZeDevice , &RootDev));
122
- return (errc == ZE_RESULT_SUCCESS && RootDev != nullptr );
123
- });
124
- for (auto &D : Platform->URDevicesCache ) {
125
- // Only ever return root-devices from urDeviceGet, but the
126
- // devices cache also keeps sub-devices.
127
- if (D->isSubDevice ())
128
- continue ;
129
-
130
- bool Matched = false ;
131
- switch (DeviceType) {
132
- case UR_DEVICE_TYPE_ALL:
133
- Matched = true ;
134
- break ;
135
- case UR_DEVICE_TYPE_GPU:
136
- case UR_DEVICE_TYPE_DEFAULT:
137
- Matched = (D->ZeDeviceProperties ->type == ZE_DEVICE_TYPE_GPU);
138
- break ;
139
- case UR_DEVICE_TYPE_CPU:
140
- Matched = (D->ZeDeviceProperties ->type == ZE_DEVICE_TYPE_CPU);
141
- break ;
142
- case UR_DEVICE_TYPE_FPGA:
143
- Matched = D->ZeDeviceProperties ->type == ZE_DEVICE_TYPE_FPGA;
144
- break ;
145
- case UR_DEVICE_TYPE_MCA:
146
- Matched = D->ZeDeviceProperties ->type == ZE_DEVICE_TYPE_MCA;
147
- break ;
148
- default :
149
- Matched = false ;
150
- UR_LOG (WARN, " Unknown device type" );
151
- break ;
152
- }
153
-
154
- if (Matched) {
155
- bool isComposite =
156
- isCombinedMode && (D->ZeDeviceProperties ->flags &
157
- ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE) == 0 ;
158
- if (!isComposite) {
159
- MatchedDevices.push_back (D.get ());
160
- // For UR_DEVICE_TYPE_DEFAULT only a single device should be returned,
161
- // so exit the loop after first proper match.
162
- if (DeviceType == UR_DEVICE_TYPE_DEFAULT)
163
- break ;
164
- }
165
- }
166
- }
167
-
168
- uint32_t ZeDeviceCount = MatchedDevices.size ();
169
-
170
- auto N = (std::min)(ZeDeviceCount, NumEntries);
171
- if (Devices)
172
- std::copy_n (MatchedDevices.begin (), N, Devices);
173
-
174
- if (NumDevices)
175
- *NumDevices = ZeDeviceCount;
176
-
177
- return UR_RESULT_SUCCESS;
178
- }
179
-
180
71
uint64_t calculateGlobalMemSize (ur_device_handle_t Device) {
181
72
// Cache GlobalMemSize
182
73
Device->ZeGlobalMemSize .Compute =
@@ -1462,6 +1353,143 @@ ur_result_t urDeviceGetInfo(
1462
1353
return UR_RESULT_SUCCESS;
1463
1354
}
1464
1355
1356
+ ur_result_t urDeviceGet (
1357
+ // / [in] handle of the platform instance
1358
+ ur_platform_handle_t Platform,
1359
+ // / [in] the type of the devices.
1360
+ ur_device_type_t DeviceType,
1361
+ // / [in] the number of devices to be added to phDevices. If phDevices in not
1362
+ // / NULL then NumEntries should be greater than zero, otherwise
1363
+ // / ::UR_RESULT_ERROR_INVALID_SIZE, will be returned.
1364
+ uint32_t NumEntries,
1365
+ // / [out][optional][range(0, NumEntries)] array of handle of devices. If
1366
+ // / NumEntries is less than the number of devices available, then
1367
+ // / platform shall only retrieve that number of devices.
1368
+ ur_device_handle_t *Devices,
1369
+ // / [out][optional] pointer to the number of devices. pNumDevices will be
1370
+ // / updated with the total number of devices available.
1371
+ uint32_t *NumDevices) {
1372
+
1373
+ auto Res = Platform->populateDeviceCacheIfNeeded ();
1374
+ if (Res != UR_RESULT_SUCCESS) {
1375
+ return Res;
1376
+ }
1377
+
1378
+ // Filter available devices based on input DeviceType.
1379
+ std::vector<ur_device_handle_t > MatchedDevices;
1380
+ std::shared_lock<ur_shared_mutex> Lock (Platform->URDevicesCacheMutex );
1381
+ // We need to filter out composite devices when
1382
+ // ZE_FLAT_DEVICE_HIERARCHY=COMBINED. We can know if we are in combined
1383
+ // mode depending on the return value of zeDeviceGetRootDevice:
1384
+ // - If COMPOSITE, L0 returns cards as devices. Since we filter out
1385
+ // subdevices early, zeDeviceGetRootDevice must return nullptr, because we
1386
+ // only query for root-devices and they don't have any device higher up in
1387
+ // the hierarchy.
1388
+ // - If FLAT, according to L0 spec, zeDeviceGetRootDevice always returns
1389
+ // nullptr in this mode.
1390
+ // - If COMBINED, L0 returns tiles as devices, and zeDeviceGetRootdevice
1391
+ // returns the card containing a given tile.
1392
+
1393
+ // Track best discrete and integrated GPU candidates (device, max compute
1394
+ // units)
1395
+ std::pair<ur_device_handle_t , uint32_t > GPUDeviceDiscrete = {nullptr , 0 };
1396
+ std::pair<ur_device_handle_t , uint32_t > GPUDeviceIntegrated = {nullptr , 0 };
1397
+ bool Device_Default_GPU = false ;
1398
+
1399
+ bool isCombinedMode =
1400
+ std::any_of (Platform->URDevicesCache .begin (),
1401
+ Platform->URDevicesCache .end (), [](const auto &D) {
1402
+ if (D->isSubDevice ())
1403
+ return false ;
1404
+ ze_device_handle_t RootDev = nullptr ;
1405
+ // Query Root Device for root-devices.
1406
+ // We cannot use ZE2UR_CALL because under some circumstances
1407
+ // this call may return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE,
1408
+ // and ZE2UR_CALL will abort because it's not
1409
+ // UR_RESULT_SUCCESS. Instead, we use ZE_CALL_NOCHECK and we
1410
+ // check manually that the result is either
1411
+ // ZE_RESULT_SUCCESS or ZE_RESULT_ERROR_UNSUPPORTED_FEATURE.
1412
+ auto errc = ZE_CALL_NOCHECK (zeDeviceGetRootDevice,
1413
+ (D->ZeDevice , &RootDev));
1414
+ return (errc == ZE_RESULT_SUCCESS && RootDev != nullptr );
1415
+ });
1416
+ for (auto &D : Platform->URDevicesCache ) {
1417
+ // Only ever return root-devices from urDeviceGet, but the
1418
+ // devices cache also keeps sub-devices.
1419
+ if (D->isSubDevice ())
1420
+ continue ;
1421
+
1422
+ bool Matched = false ;
1423
+ switch (DeviceType) {
1424
+ case UR_DEVICE_TYPE_ALL:
1425
+ Matched = true ;
1426
+ break ;
1427
+ case UR_DEVICE_TYPE_GPU:
1428
+ case UR_DEVICE_TYPE_DEFAULT:
1429
+ Matched = (D->ZeDeviceProperties ->type == ZE_DEVICE_TYPE_GPU);
1430
+ Device_Default_GPU = true ;
1431
+ break ;
1432
+ case UR_DEVICE_TYPE_CPU:
1433
+ Matched = (D->ZeDeviceProperties ->type == ZE_DEVICE_TYPE_CPU);
1434
+ break ;
1435
+ case UR_DEVICE_TYPE_FPGA:
1436
+ Matched = D->ZeDeviceProperties ->type == ZE_DEVICE_TYPE_FPGA;
1437
+ break ;
1438
+ case UR_DEVICE_TYPE_MCA:
1439
+ Matched = D->ZeDeviceProperties ->type == ZE_DEVICE_TYPE_MCA;
1440
+ break ;
1441
+ default :
1442
+ Matched = false ;
1443
+ UR_LOG (WARN, " Unknown device type" );
1444
+ break ;
1445
+ }
1446
+
1447
+ if (Matched) {
1448
+ bool isComposite =
1449
+ isCombinedMode && (D->ZeDeviceProperties ->flags &
1450
+ ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE) == 0 ;
1451
+ if (!isComposite) {
1452
+ // In case of DeviceType is GPU or DEFAULT, pick only the most powerful
1453
+ // device.
1454
+ if (Device_Default_GPU) {
1455
+ uint32_t maxComputeUnits = 0 ;
1456
+ ur_result_t UrRet = ur::level_zero::urDeviceGetInfo (
1457
+ D.get (), UR_DEVICE_INFO_MAX_COMPUTE_UNITS,
1458
+ sizeof (maxComputeUnits), &maxComputeUnits, nullptr );
1459
+ maxComputeUnits = (UrRet == UR_RESULT_SUCCESS) ? maxComputeUnits : 0 ;
1460
+ auto &BestGpu =
1461
+ D->isIntegrated () ? GPUDeviceIntegrated : GPUDeviceDiscrete;
1462
+ if (!BestGpu.first || maxComputeUnits > BestGpu.second )
1463
+ BestGpu = std::make_pair (D.get (), maxComputeUnits);
1464
+ } else {
1465
+ MatchedDevices.push_back (D.get ());
1466
+ }
1467
+ }
1468
+ }
1469
+ }
1470
+
1471
+ // Handle GPU/DEFAULT device selection outside the loop
1472
+ if (Device_Default_GPU) {
1473
+ // Prefer discrete GPU over integrated GPU
1474
+ if (GPUDeviceDiscrete.first ) {
1475
+ MatchedDevices = {GPUDeviceDiscrete.first };
1476
+ } else if (GPUDeviceIntegrated.first ) {
1477
+ MatchedDevices = {GPUDeviceIntegrated.first };
1478
+ }
1479
+ }
1480
+
1481
+ uint32_t ZeDeviceCount = MatchedDevices.size ();
1482
+
1483
+ auto N = (std::min)(ZeDeviceCount, NumEntries);
1484
+ if (Devices)
1485
+ std::copy_n (MatchedDevices.begin (), N, Devices);
1486
+
1487
+ if (NumDevices)
1488
+ *NumDevices = ZeDeviceCount;
1489
+
1490
+ return UR_RESULT_SUCCESS;
1491
+ }
1492
+
1465
1493
bool CopyEngineRequested (const ur_device_handle_t &Device) {
1466
1494
int LowerCopyQueueIndex = getRangeOfAllowedCopyEngines (Device).first ;
1467
1495
int UpperCopyQueueIndex = getRangeOfAllowedCopyEngines (Device).second ;
0 commit comments