From f23e97dc175fdc13684ebcc5ffb942652e814592 Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Wed, 10 Sep 2025 20:52:58 +0200 Subject: [PATCH 1/4] [SYCL][UR] Support sycl_ext_oneapi_clock Spec: https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_clock.asciidoc --- sycl/source/detail/device_impl.hpp | 13 ++++---- .../detail/ur_device_info_ret_types.inc | 3 ++ sycl/test-e2e/Experimental/clock.cpp | 3 ++ unified-runtime/include/ur_api.h | 9 ++++++ .../source/adapters/opencl/device.cpp | 30 +++++++++++++++++++ 5 files changed, 52 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 38214254595c6..7958f384c09f3 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -1580,16 +1580,17 @@ class device_impl : public std::enable_shared_from_this { .value_or(0); } CASE(ext_oneapi_clock_sub_group) { - // Will be updated in a follow-up UR patch. - return false; + return get_info_impl_nocheck() + .value_or(0); } CASE(ext_oneapi_clock_work_group) { - // Will be updated in a follow-up UR patch. - return false; + return get_info_impl_nocheck< + UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP>() + .value_or(0); } CASE(ext_oneapi_clock_device) { - // Will be updated in a follow-up UR patch. - return false; + return get_info_impl_nocheck() + .value_or(0); } else { return false; // This device aspect has not been implemented yet. diff --git a/sycl/source/detail/ur_device_info_ret_types.inc b/sycl/source/detail/ur_device_info_ret_types.inc index e1e724262b85f..11336b2ff4e8c 100644 --- a/sycl/source/detail/ur_device_info_ret_types.inc +++ b/sycl/source/detail/ur_device_info_ret_types.inc @@ -193,4 +193,7 @@ MAP(UR_DEVICE_INFO_MIPMAP_SUPPORT_EXP, ur_bool_t) MAP(UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP, ur_bool_t) MAP(UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES, ur_kernel_launch_properties_flags_t) MAP(UR_DEVICE_INFO_MEMORY_EXPORT_EXPORTABLE_DEVICE_MEM_EXP, ur_bool_t) +MAP(UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP, ur_bool_t) +MAP(UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP, ur_bool_t) +MAP(UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP, ur_bool_t) // clang-format on diff --git a/sycl/test-e2e/Experimental/clock.cpp b/sycl/test-e2e/Experimental/clock.cpp index 604900d87294a..692148b9904d2 100644 --- a/sycl/test-e2e/Experimental/clock.cpp +++ b/sycl/test-e2e/Experimental/clock.cpp @@ -1,3 +1,6 @@ +// UNSUPPORTED: cpu +// UNSUPPORTED-INTENDED: Bug in CPU RT. Waiting for the new version. + // REQUIRES: aspect-usm_shared_allocations // REQUIRES: aspect-ext_oneapi_clock_sub_group || aspect-ext_oneapi_clock_work_group || aspect-ext_oneapi_clock_device // RUN: %{build} -o %t.out diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h index f78714b4e06aa..13516fa6f46b3 100644 --- a/unified-runtime/include/ur_api.h +++ b/unified-runtime/include/ur_api.h @@ -2429,6 +2429,15 @@ typedef enum ur_device_info_t { /// [::ur_bool_t] returns true if the device supports enqueueing of /// allocations and frees. UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP = 0x2050, + /// [::ur_bool_t] returns true if the device supports sampling values from the + /// sub-group clock. + UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP = 0x2051, + /// [::ur_bool_t] returns true if the device supports sampling values from the + /// work-group clock. + UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP = 0x2052, + /// [::ur_bool_t] returns true if the device supports sampling values from the + /// device clock. + UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP = 0x2053, /// [::ur_bool_t] Returns true if the device supports the USM P2P /// experimental feature. UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP = 0x4000, diff --git a/unified-runtime/source/adapters/opencl/device.cpp b/unified-runtime/source/adapters/opencl/device.cpp index 4f697b05b5c88..d944789acfebe 100644 --- a/unified-runtime/source/adapters/opencl/device.cpp +++ b/unified-runtime/source/adapters/opencl/device.cpp @@ -1480,6 +1480,36 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(nodeMask); } + case UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP: + case UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP: + case UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP: { + bool Supported = false; + size_t ExtSize = 0; + + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + hDevice->CLDevice, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); + std::string ExtStr(ExtSize, '\0'); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, + CL_DEVICE_EXTENSIONS, ExtSize, + ExtStr.data(), nullptr)); + + if (ExtStr.find("cl_khr_kernel_clock") != std::string::npos) { + cl_device_kernel_clock_capabilities_khr caps = 0; + + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + hDevice->CLDevice, CL_DEVICE_KERNEL_CLOCK_CAPABILITIES_KHR, + sizeof(cl_device_kernel_clock_capabilities_khr), &caps, nullptr)); + + if ((propName == UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP && + (caps & CL_DEVICE_KERNEL_CLOCK_SCOPE_SUB_GROUP_KHR)) || + (propName == UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP && + (caps & CL_DEVICE_KERNEL_CLOCK_SCOPE_WORK_GROUP_KHR)) || + (propName == UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP && + (caps & CL_DEVICE_KERNEL_CLOCK_SCOPE_DEVICE_KHR))) + Supported = true; + } + return ReturnValue(Supported); + } // TODO: We can't query to check if these are supported, they will need to be // manually updated if support is ever implemented. case UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS: From 20ee5549ba12db402e3c7c0b2fa7ae9642188edb Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Fri, 12 Sep 2025 17:37:48 +0200 Subject: [PATCH 2/4] format --- unified-runtime/source/adapters/opencl/device.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unified-runtime/source/adapters/opencl/device.cpp b/unified-runtime/source/adapters/opencl/device.cpp index d944789acfebe..eac2c9fe0bf2c 100644 --- a/unified-runtime/source/adapters/opencl/device.cpp +++ b/unified-runtime/source/adapters/opencl/device.cpp @@ -1501,11 +1501,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, sizeof(cl_device_kernel_clock_capabilities_khr), &caps, nullptr)); if ((propName == UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP && - (caps & CL_DEVICE_KERNEL_CLOCK_SCOPE_SUB_GROUP_KHR)) || + (caps & CL_DEVICE_KERNEL_CLOCK_SCOPE_SUB_GROUP_KHR)) || (propName == UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP && - (caps & CL_DEVICE_KERNEL_CLOCK_SCOPE_WORK_GROUP_KHR)) || + (caps & CL_DEVICE_KERNEL_CLOCK_SCOPE_WORK_GROUP_KHR)) || (propName == UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP && - (caps & CL_DEVICE_KERNEL_CLOCK_SCOPE_DEVICE_KHR))) + (caps & CL_DEVICE_KERNEL_CLOCK_SCOPE_DEVICE_KHR))) Supported = true; } return ReturnValue(Supported); From 8d32f5933d4056b8ed0fd95ac8f1b7c458124717 Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Wed, 17 Sep 2025 13:29:13 +0200 Subject: [PATCH 3/4] upd test --- sycl/test-e2e/Experimental/clock.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/sycl/test-e2e/Experimental/clock.cpp b/sycl/test-e2e/Experimental/clock.cpp index 692148b9904d2..604900d87294a 100644 --- a/sycl/test-e2e/Experimental/clock.cpp +++ b/sycl/test-e2e/Experimental/clock.cpp @@ -1,6 +1,3 @@ -// UNSUPPORTED: cpu -// UNSUPPORTED-INTENDED: Bug in CPU RT. Waiting for the new version. - // REQUIRES: aspect-usm_shared_allocations // REQUIRES: aspect-ext_oneapi_clock_sub_group || aspect-ext_oneapi_clock_work_group || aspect-ext_oneapi_clock_device // RUN: %{build} -o %t.out From 72ac5edf3c1388d3d0ac4cece71d10300ff9585b Mon Sep 17 00:00:00 2001 From: "Kornev, Nikita" Date: Wed, 17 Sep 2025 16:57:33 +0200 Subject: [PATCH 4/4] add feature & generate source --- unified-runtime/include/ur_api.h | 18 +++---- unified-runtime/include/ur_print.hpp | 48 +++++++++++++++++ unified-runtime/scripts/core/EXP-CLOCK.rst | 60 ++++++++++++++++++++++ unified-runtime/scripts/core/exp-clock.yml | 31 +++++++++++ unified-runtime/tools/urinfo/urinfo.hpp | 8 +++ 5 files changed, 156 insertions(+), 9 deletions(-) create mode 100644 unified-runtime/scripts/core/EXP-CLOCK.rst create mode 100644 unified-runtime/scripts/core/exp-clock.yml diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h index 13516fa6f46b3..b3874c8cb4490 100644 --- a/unified-runtime/include/ur_api.h +++ b/unified-runtime/include/ur_api.h @@ -2429,15 +2429,15 @@ typedef enum ur_device_info_t { /// [::ur_bool_t] returns true if the device supports enqueueing of /// allocations and frees. UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP = 0x2050, - /// [::ur_bool_t] returns true if the device supports sampling values from the - /// sub-group clock. - UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP = 0x2051, - /// [::ur_bool_t] returns true if the device supports sampling values from the - /// work-group clock. - UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP = 0x2052, - /// [::ur_bool_t] returns true if the device supports sampling values from the - /// device clock. - UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP = 0x2053, + /// [::ur_bool_t] returns true if the device supports sampling values from + /// the sub-group clock. + UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP = 0x2060, + /// [::ur_bool_t] returns true if the device supports sampling values from + /// the work-group clock. + UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP = 0x2061, + /// [::ur_bool_t] returns true if the device supports sampling values from + /// the device clock. + UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP = 0x2062, /// [::ur_bool_t] Returns true if the device supports the USM P2P /// experimental feature. UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP = 0x4000, diff --git a/unified-runtime/include/ur_print.hpp b/unified-runtime/include/ur_print.hpp index 15c50dd0eb479..62f653a640e08 100644 --- a/unified-runtime/include/ur_print.hpp +++ b/unified-runtime/include/ur_print.hpp @@ -3119,6 +3119,15 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { case UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP: os << "UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP"; break; + case UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP: + os << "UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP"; + break; + case UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP: + os << "UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP"; + break; + case UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP: + os << "UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP"; + break; case UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP: os << "UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP"; break; @@ -5257,6 +5266,45 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, os << ")"; } break; + case UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size + << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + case UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size + << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + case UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size + << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; case UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { diff --git a/unified-runtime/scripts/core/EXP-CLOCK.rst b/unified-runtime/scripts/core/EXP-CLOCK.rst new file mode 100644 index 0000000000000..327e6840f6920 --- /dev/null +++ b/unified-runtime/scripts/core/EXP-CLOCK.rst @@ -0,0 +1,60 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +.. _experimental-clock: + +================================================================================ +Clock +================================================================================ + +.. warning:: + + Experimental features: + + * May be replaced, updated, or removed at any time. + * Do not require maintaining API/ABI stability of their own additions over + time. + * Do not require conformance testing of their own additions. + + +Motivation +-------------------------------------------------------------------------------- +This experimental extension enables the sycl_ext_oneapi_clock feature: +https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_clock.asciidoc +It introduces descriptors to query sub-group/work-group/device clock support. + +API +-------------------------------------------------------------------------------- + +Enums +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* ${x}_device_info_t + * ${X}_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP + * ${X}_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP + * ${X}_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP + +Changelog +-------------------------------------------------------------------------------- + ++-----------+------------------------+ +| Revision | Changes | ++===========+========================+ +| 1.0 | Initial Draft | ++-----------+------------------------+ + + +Support +-------------------------------------------------------------------------------- + +Adapters which support this experimental feature *must* return ${X}_RESULT_SUCCESS from +the ${x}DeviceGetInfo call with new ${X}_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP, +${X}_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP or ${X}_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP +device descriptors. + +Contributors +-------------------------------------------------------------------------------- + +* Kornev, Nikita `nikita.kornev@intel.com `_ diff --git a/unified-runtime/scripts/core/exp-clock.yml b/unified-runtime/scripts/core/exp-clock.yml new file mode 100644 index 0000000000000..323c14135d1e5 --- /dev/null +++ b/unified-runtime/scripts/core/exp-clock.yml @@ -0,0 +1,31 @@ +# +# Copyright (C) 2025 Intel Corporation +# +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +# Exceptions. +# See LICENSE.TXT +# +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# See YaML.md for syntax definition +# +--- #-------------------------------------------------------------------------- +type: header +desc: "Intel $OneApi Unified Runtime Experimental APIs for Clock" +ordinal: "99" +--- #-------------------------------------------------------------------------- +type: enum +extend: true +typed_etors: true +desc: "Extension enums for $x_device_info_t to support multi device compile." +name: $x_device_info_t +etors: + - name: CLOCK_SUB_GROUP_SUPPORT_EXP + value: "0x2060" + desc: "[$x_bool_t] returns true if the device supports sampling values from the sub-group clock." + - name: CLOCK_WORK_GROUP_SUPPORT_EXP + value: "0x2061" + desc: "[$x_bool_t] returns true if the device supports sampling values from the work-group clock." + - name: CLOCK_DEVICE_SUPPORT_EXP + value: "0x2062" + desc: "[$x_bool_t] returns true if the device supports sampling values from the device clock." diff --git a/unified-runtime/tools/urinfo/urinfo.hpp b/unified-runtime/tools/urinfo/urinfo.hpp index b08661787cccf..3407c57f847d7 100644 --- a/unified-runtime/tools/urinfo/urinfo.hpp +++ b/unified-runtime/tools/urinfo/urinfo.hpp @@ -448,6 +448,14 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, printDeviceInfo(hDevice, UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP); std::cout << prefix; + printDeviceInfo(hDevice, + UR_DEVICE_INFO_CLOCK_SUB_GROUP_SUPPORT_EXP); + std::cout << prefix; + printDeviceInfo(hDevice, + UR_DEVICE_INFO_CLOCK_WORK_GROUP_SUPPORT_EXP); + std::cout << prefix; + printDeviceInfo(hDevice, UR_DEVICE_INFO_CLOCK_DEVICE_SUPPORT_EXP); + std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP); std::cout << prefix; printDeviceInfo(hDevice,