From eb7e1b53b65fe5d0f0faa70e9cd0f6604a8e0ab2 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Mon, 18 Aug 2025 16:45:56 +0200 Subject: [PATCH] [SYCL] Fix and re-enable bindless image copy tests on L0 --- sycl/source/detail/cg.hpp | 9 +- sycl/source/detail/handler_impl.hpp | 1 + sycl/source/detail/memory_manager.cpp | 10 +- sycl/source/detail/memory_manager.hpp | 6 +- sycl/source/detail/scheduler/commands.cpp | 5 +- sycl/source/handler.cpp | 127 +++++++----- .../copies/copy_subregion_1D.cpp | 1 - .../copies/copy_subregion_2D.cpp | 3 +- .../copies/device_to_device_copy.cpp | 1 - .../copies/device_to_device_pitched.cpp | 1 - unified-runtime/include/ur_api.h | 23 +++ unified-runtime/include/ur_ddi.h | 4 +- unified-runtime/include/ur_print.h | 10 + unified-runtime/include/ur_print.hpp | 32 ++++ .../scripts/core/exp-bindless-images.yml | 17 ++ .../source/adapters/cuda/image.cpp | 5 +- unified-runtime/source/adapters/hip/image.cpp | 5 +- .../source/adapters/level_zero/image.cpp | 22 ++- .../adapters/level_zero/image_common.cpp | 180 +++++++++--------- .../adapters/level_zero/image_common.hpp | 1 + .../level_zero/ur_interface_loader.hpp | 6 +- .../level_zero/v2/command_list_manager.cpp | 11 +- .../level_zero/v2/command_list_manager.hpp | 6 +- .../adapters/level_zero/v2/queue_api.cpp | 10 +- .../adapters/level_zero/v2/queue_api.hpp | 5 +- .../v2/queue_immediate_in_order.hpp | 9 +- .../v2/queue_immediate_out_of_order.hpp | 9 +- .../source/adapters/mock/ur_mockddi.cpp | 4 + .../source/adapters/native_cpu/image.cpp | 1 + .../source/adapters/opencl/image.cpp | 1 + .../loader/layers/tracing/ur_trcddi.cpp | 8 +- .../loader/layers/validation/ur_valddi.cpp | 10 +- unified-runtime/source/loader/loader.def.in | 1 + unified-runtime/source/loader/loader.map.in | 1 + unified-runtime/source/loader/ur_ldrddi.cpp | 7 +- unified-runtime/source/loader/ur_libapi.cpp | 9 +- unified-runtime/source/loader/ur_print.cpp | 9 + unified-runtime/source/ur_api.cpp | 5 + 38 files changed, 379 insertions(+), 196 deletions(-) diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index 5f3229c3ed798..5426d31689f82 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -600,6 +600,7 @@ class CGCopyImage : public CG { ur_image_format_t MSrcImageFormat; ur_image_format_t MDstImageFormat; ur_exp_image_copy_flags_t MImageCopyFlags; + ur_exp_image_copy_input_types_t MImageInputTypes; ur_rect_offset_t MSrcOffset; ur_rect_offset_t MDstOffset; ur_rect_region_t MCopyExtent; @@ -609,14 +610,15 @@ class CGCopyImage : public CG { ur_image_desc_t DstImageDesc, ur_image_format_t SrcImageFormat, ur_image_format_t DstImageFormat, ur_exp_image_copy_flags_t ImageCopyFlags, + ur_exp_image_copy_input_types_t ImageInputTypes, ur_rect_offset_t SrcOffset, ur_rect_offset_t DstOffset, ur_rect_region_t CopyExtent, CG::StorageInitHelper CGData, detail::code_location loc = {}) : CG(CGType::CopyImage, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), MSrcImageDesc(SrcImageDesc), MDstImageDesc(DstImageDesc), MSrcImageFormat(SrcImageFormat), MDstImageFormat(DstImageFormat), - MImageCopyFlags(ImageCopyFlags), MSrcOffset(SrcOffset), - MDstOffset(DstOffset), MCopyExtent(CopyExtent) {} + MImageCopyFlags(ImageCopyFlags), MImageInputTypes(ImageInputTypes), + MSrcOffset(SrcOffset), MDstOffset(DstOffset), MCopyExtent(CopyExtent) {} void *getSrc() const { return MSrc; } void *getDst() const { return MDst; } @@ -625,6 +627,9 @@ class CGCopyImage : public CG { ur_image_format_t getSrcFormat() const { return MSrcImageFormat; } ur_image_format_t getDstFormat() const { return MDstImageFormat; } ur_exp_image_copy_flags_t getCopyFlags() const { return MImageCopyFlags; } + ur_exp_image_copy_input_types_t getCopyInputTypes() const { + return MImageInputTypes; + } ur_rect_offset_t getSrcOffset() const { return MSrcOffset; } ur_rect_offset_t getDstOffset() const { return MDstOffset; } ur_rect_region_t getCopyExtent() const { return MCopyExtent; } diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index f62bedc72d553..418be06218299 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -120,6 +120,7 @@ class handler_impl { ur_image_format_t MSrcImageFormat = {}; ur_image_format_t MDstImageFormat = {}; ur_exp_image_copy_flags_t MImageCopyFlags = {}; + ur_exp_image_copy_input_types_t MImageCopyInputTypes = {}; ur_rect_offset_t MSrcOffset = {}; ur_rect_offset_t MDestOffset = {}; diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index b0f41907b6f3a..f3bab09e2ca33 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -1570,8 +1570,10 @@ void MemoryManager::copy_image_bindless( queue_impl &Queue, const void *Src, void *Dst, const ur_image_desc_t &SrcDesc, const ur_image_desc_t &DstDesc, const ur_image_format_t &SrcFormat, const ur_image_format_t &DstFormat, - const ur_exp_image_copy_flags_t Flags, ur_rect_offset_t SrcOffset, - ur_rect_offset_t DstOffset, ur_rect_region_t CopyExtent, + const ur_exp_image_copy_flags_t Flags, + const ur_exp_image_copy_input_types_t InputTypes, + ur_rect_offset_t SrcOffset, ur_rect_offset_t DstOffset, + ur_rect_region_t CopyExtent, const std::vector &DepEvents, ur_event_handle_t *OutEvent) { assert((Flags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE || @@ -1594,8 +1596,8 @@ void MemoryManager::copy_image_bindless( Adapter.call( Queue.getHandleRef(), Src, Dst, &SrcDesc, &DstDesc, &SrcFormat, - &DstFormat, &CopyRegion, Flags, DepEvents.size(), DepEvents.data(), - OutEvent); + &DstFormat, &CopyRegion, Flags, InputTypes, DepEvents.size(), + DepEvents.data(), OutEvent); } } // namespace detail diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp index 02b0c7d673433..6a4f986c2a840 100644 --- a/sycl/source/detail/memory_manager.hpp +++ b/sycl/source/detail/memory_manager.hpp @@ -264,8 +264,10 @@ class MemoryManager { queue_impl &Queue, const void *Src, void *Dst, const ur_image_desc_t &SrcDesc, const ur_image_desc_t &DstDesc, const ur_image_format_t &SrcFormat, const ur_image_format_t &DstFormat, - const ur_exp_image_copy_flags_t Flags, ur_rect_offset_t SrcOffset, - ur_rect_offset_t DstOffset, ur_rect_region_t CopyExtent, + const ur_exp_image_copy_flags_t Flags, + const ur_exp_image_copy_input_types_t InputTypes, + ur_rect_offset_t SrcOffset, ur_rect_offset_t DstOffset, + ur_rect_region_t CopyExtent, const std::vector &DepEvents, ur_event_handle_t *OutEvent); }; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 1e1e37e63a9d2..9b3610ff21266 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -3665,8 +3665,9 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { MemoryManager::copy_image_bindless, *MQueue, Copy->getSrc(), Copy->getDst(), Copy->getSrcDesc(), Copy->getDstDesc(), Copy->getSrcFormat(), Copy->getDstFormat(), Copy->getCopyFlags(), - Copy->getSrcOffset(), Copy->getDstOffset(), Copy->getCopyExtent(), - std::move(RawEvents), Event); + Copy->getCopyInputTypes(), Copy->getSrcOffset(), + Copy->getDstOffset(), Copy->getCopyExtent(), std::move(RawEvents), + Event); Result != UR_RESULT_SUCCESS) return Result; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index d74d96f397093..a9a8fa3d40789 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -239,16 +239,16 @@ fill_image_desc(const ext::oneapi::experimental::image_descriptor &ImgDesc) { return UrDesc; } -static void -fill_copy_args(detail::handler_impl *impl, - const ext::oneapi::experimental::image_descriptor &SrcImgDesc, - const ext::oneapi::experimental::image_descriptor &DestImgDesc, - ur_exp_image_copy_flags_t ImageCopyFlags, size_t SrcPitch, - size_t DestPitch, sycl::range<3> SrcOffset = {0, 0, 0}, - sycl::range<3> SrcExtent = {0, 0, 0}, - sycl::range<3> DestOffset = {0, 0, 0}, - sycl::range<3> DestExtent = {0, 0, 0}, - sycl::range<3> CopyExtent = {0, 0, 0}) { +static void fill_copy_args( + detail::handler_impl *impl, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + ur_exp_image_copy_flags_t ImageCopyFlags, + ur_exp_image_copy_input_types_t ImageCopyInputTypes, size_t SrcPitch, + size_t DestPitch, sycl::range<3> SrcOffset = {0, 0, 0}, + sycl::range<3> SrcExtent = {0, 0, 0}, sycl::range<3> DestOffset = {0, 0, 0}, + sycl::range<3> DestExtent = {0, 0, 0}, + sycl::range<3> CopyExtent = {0, 0, 0}) { SrcImgDesc.verify(); DestImgDesc.verify(); @@ -267,12 +267,13 @@ fill_copy_args(detail::handler_impl *impl, auto ZCopyExtentComponent = detail::fill_image_type(SrcImgDesc, UrSrcDesc); detail::fill_image_type(DestImgDesc, UrDestDesc); - // Copy args computed here are directly passed to UR. Various offsets and - // extents end up passed as ur_rect_offset_t and ur_rect_region_t. Both those - // structs expect their first component to be in bytes, not in pixels - size_t SrcPixelSize = SrcImgDesc.num_channels * get_channel_size(SrcImgDesc); - size_t DestPixelSize = - DestImgDesc.num_channels * get_channel_size(DestImgDesc); + // ur_rect_offset_t and ur_rect_offset_t which represent image offsets and + // copy extents expect that X-axis offset and region width are specified in + // bytes rather then in elements. + auto SrcPixelSize = + SrcImgDesc.num_channels * detail::get_channel_size(SrcImgDesc); + auto DestPixelSize = + DestImgDesc.num_channels * detail::get_channel_size(DestImgDesc); impl->MSrcOffset = {SrcOffset[0] * SrcPixelSize, SrcOffset[1], SrcOffset[2]}; impl->MDestOffset = {DestOffset[0] * DestPixelSize, DestOffset[1], @@ -282,6 +283,7 @@ fill_copy_args(detail::handler_impl *impl, impl->MSrcImageFormat = UrSrcFormat; impl->MDstImageFormat = UrDestFormat; impl->MImageCopyFlags = ImageCopyFlags; + impl->MImageCopyInputTypes = ImageCopyInputTypes; if (CopyExtent.size() != 0) { impl->MCopyExtent = {CopyExtent[0] * SrcPixelSize, CopyExtent[1], @@ -311,6 +313,7 @@ static void fill_copy_args(detail::handler_impl *impl, const ext::oneapi::experimental::image_descriptor &Desc, ur_exp_image_copy_flags_t ImageCopyFlags, + ur_exp_image_copy_input_types_t ImageCopyInputTypes, sycl::range<3> SrcOffset = {0, 0, 0}, sycl::range<3> SrcExtent = {0, 0, 0}, sycl::range<3> DestOffset = {0, 0, 0}, @@ -320,22 +323,24 @@ fill_copy_args(detail::handler_impl *impl, size_t SrcPitch = SrcExtent[0] * Desc.num_channels * get_channel_size(Desc); size_t DestPitch = DestExtent[0] * Desc.num_channels * get_channel_size(Desc); - fill_copy_args(impl, Desc, Desc, ImageCopyFlags, SrcPitch, DestPitch, - SrcOffset, SrcExtent, DestOffset, DestExtent, CopyExtent); + fill_copy_args(impl, Desc, Desc, ImageCopyFlags, ImageCopyInputTypes, + SrcPitch, DestPitch, SrcOffset, SrcExtent, DestOffset, + DestExtent, CopyExtent); } -static void -fill_copy_args(detail::handler_impl *impl, - const ext::oneapi::experimental::image_descriptor &Desc, - ur_exp_image_copy_flags_t ImageCopyFlags, size_t SrcPitch, - size_t DestPitch, sycl::range<3> SrcOffset = {0, 0, 0}, - sycl::range<3> SrcExtent = {0, 0, 0}, - sycl::range<3> DestOffset = {0, 0, 0}, - sycl::range<3> DestExtent = {0, 0, 0}, - sycl::range<3> CopyExtent = {0, 0, 0}) { +static void fill_copy_args( + detail::handler_impl *impl, + const ext::oneapi::experimental::image_descriptor &Desc, + ur_exp_image_copy_flags_t ImageCopyFlags, + ur_exp_image_copy_input_types_t ImageCopyInputTypes, size_t SrcPitch, + size_t DestPitch, sycl::range<3> SrcOffset = {0, 0, 0}, + sycl::range<3> SrcExtent = {0, 0, 0}, sycl::range<3> DestOffset = {0, 0, 0}, + sycl::range<3> DestExtent = {0, 0, 0}, + sycl::range<3> CopyExtent = {0, 0, 0}) { - fill_copy_args(impl, Desc, Desc, ImageCopyFlags, SrcPitch, DestPitch, - SrcOffset, SrcExtent, DestOffset, DestExtent, CopyExtent); + fill_copy_args(impl, Desc, Desc, ImageCopyFlags, ImageCopyInputTypes, + SrcPitch, DestPitch, SrcOffset, SrcExtent, DestOffset, + DestExtent, CopyExtent); } static void @@ -343,6 +348,7 @@ fill_copy_args(detail::handler_impl *impl, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, const ext::oneapi::experimental::image_descriptor &DestImgDesc, ur_exp_image_copy_flags_t ImageCopyFlags, + ur_exp_image_copy_input_types_t ImageCopyInputTypes, sycl::range<3> SrcOffset = {0, 0, 0}, sycl::range<3> SrcExtent = {0, 0, 0}, sycl::range<3> DestOffset = {0, 0, 0}, @@ -354,9 +360,9 @@ fill_copy_args(detail::handler_impl *impl, size_t DestPitch = DestExtent[0] * DestImgDesc.num_channels * get_channel_size(DestImgDesc); - fill_copy_args(impl, SrcImgDesc, DestImgDesc, ImageCopyFlags, SrcPitch, - DestPitch, SrcOffset, SrcExtent, DestOffset, DestExtent, - CopyExtent); + fill_copy_args(impl, SrcImgDesc, DestImgDesc, ImageCopyFlags, + ImageCopyInputTypes, SrcPitch, DestPitch, SrcOffset, SrcExtent, + DestOffset, DestExtent, CopyExtent); } } // namespace detail @@ -905,8 +911,8 @@ event handler::finalize() { CommandGroup.reset(new detail::CGCopyImage( MSrcPtr, MDstPtr, impl->MSrcImageDesc, impl->MDstImageDesc, impl->MSrcImageFormat, impl->MDstImageFormat, impl->MImageCopyFlags, - impl->MSrcOffset, impl->MDestOffset, impl->MCopyExtent, - std::move(impl->CGData), MCodeLoc)); + impl->MImageCopyInputTypes, impl->MSrcOffset, impl->MDestOffset, + impl->MCopyExtent, std::move(impl->CGData), MCodeLoc)); break; case detail::CGType::SemaphoreWait: CommandGroup.reset( @@ -1622,7 +1628,8 @@ void handler::ext_oneapi_copy( MDstPtr = reinterpret_cast(Dest.raw_handle); detail::fill_copy_args(get_impl(), DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE); + UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_IMAGE); setType(detail::CGType::CopyImage); } @@ -1640,7 +1647,8 @@ void handler::ext_oneapi_copy( MDstPtr = reinterpret_cast(Dest.raw_handle); detail::fill_copy_args(get_impl(), DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE, SrcOffset, + UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_IMAGE, SrcOffset, SrcExtent, DestOffset, {0, 0, 0}, CopyExtent); setType(detail::CGType::CopyImage); @@ -1657,7 +1665,8 @@ void handler::ext_oneapi_copy( MDstPtr = Dest; detail::fill_copy_args(get_impl(), SrcImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST); + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST, + UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_MEM); setType(detail::CGType::CopyImage); } @@ -1676,7 +1685,8 @@ void handler::ext_oneapi_copy( MDstPtr = Dest; detail::fill_copy_args(get_impl(), SrcImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST, SrcOffset, + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST, + UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_MEM, SrcOffset, {0, 0, 0}, DestOffset, DestExtent, CopyExtent); setType(detail::CGType::CopyImage); @@ -1705,11 +1715,13 @@ void handler::ext_oneapi_copy( Desc.width * Desc.num_channels * detail::get_channel_size(Desc); if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) { - detail::fill_copy_args(get_impl(), Desc, ImageCopyFlags, HostRowPitch, - DeviceRowPitch); + detail::fill_copy_args(get_impl(), Desc, ImageCopyFlags, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM, + HostRowPitch, DeviceRowPitch); } else if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { - detail::fill_copy_args(get_impl(), Desc, ImageCopyFlags, DeviceRowPitch, - HostRowPitch); + detail::fill_copy_args(get_impl(), Desc, ImageCopyFlags, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM, + DeviceRowPitch, HostRowPitch); } else { throw sycl::exception(make_error_code(errc::invalid), "Copy Error: This copy function only performs host " @@ -1746,10 +1758,12 @@ void handler::ext_oneapi_copy( // Fill the host extent based on the type of copy. if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) { detail::fill_copy_args(get_impl(), DeviceImgDesc, ImageCopyFlags, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM, HostRowPitch, DeviceRowPitch, SrcOffset, HostExtent, DestOffset, {0, 0, 0}, CopyExtent); } else if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { detail::fill_copy_args(get_impl(), DeviceImgDesc, ImageCopyFlags, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM, DeviceRowPitch, HostRowPitch, SrcOffset, {0, 0, 0}, DestOffset, HostExtent, CopyExtent); } else { @@ -1774,7 +1788,8 @@ void handler::ext_oneapi_copy( MDstPtr = reinterpret_cast(Dest.raw_handle); detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE); + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, + UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE); setType(detail::CGType::CopyImage); } @@ -1794,8 +1809,10 @@ void handler::ext_oneapi_copy( MDstPtr = reinterpret_cast(Dest.raw_handle); detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, SrcOffset, - {0, 0, 0}, DestOffset, {0, 0, 0}, CopyExtent); + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, + UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE, + SrcOffset, {0, 0, 0}, DestOffset, {0, 0, 0}, + CopyExtent); setType(detail::CGType::CopyImage); } @@ -1813,7 +1830,8 @@ void handler::ext_oneapi_copy( MDstPtr = Dest; detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, 0, + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, + UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_MEM, 0, DestRowPitch); setType(detail::CGType::CopyImage); @@ -1834,7 +1852,8 @@ void handler::ext_oneapi_copy( MDstPtr = Dest; detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, 0, + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, + UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_MEM, 0, DestRowPitch, SrcOffset, {0, 0, 0}, DestOffset, {0, 0, 0}, CopyExtent); @@ -1854,8 +1873,9 @@ void handler::ext_oneapi_copy( MDstPtr = reinterpret_cast(Dest.raw_handle); detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, SrcRowPitch, - 0); + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_IMAGE, + SrcRowPitch, 0); setType(detail::CGType::CopyImage); } @@ -1875,9 +1895,10 @@ void handler::ext_oneapi_copy( MDstPtr = reinterpret_cast(Dest.raw_handle); detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, - UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, SrcRowPitch, - 0, SrcOffset, {0, 0, 0}, DestOffset, {0, 0, 0}, - CopyExtent); + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_IMAGE, + SrcRowPitch, 0, SrcOffset, {0, 0, 0}, DestOffset, + {0, 0, 0}, CopyExtent); setType(detail::CGType::CopyImage); } @@ -1904,6 +1925,7 @@ void handler::ext_oneapi_copy( if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE || ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST) { detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, ImageCopyFlags, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM, SrcRowPitch, DestRowPitch); } else { throw sycl::exception(make_error_code(errc::invalid), @@ -1933,6 +1955,7 @@ void handler::ext_oneapi_copy( if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE || ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST) { detail::fill_copy_args(get_impl(), SrcImgDesc, DestImgDesc, ImageCopyFlags, + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM, SrcRowPitch, DestRowPitch, SrcOffset, {0, 0, 0}, DestOffset, {0, 0, 0}, CopyExtent); } else { diff --git a/sycl/test-e2e/bindless_images/copies/copy_subregion_1D.cpp b/sycl/test-e2e/bindless_images/copies/copy_subregion_1D.cpp index 7bad414cd0881..e07e15be2d239 100644 --- a/sycl/test-e2e/bindless_images/copies/copy_subregion_1D.cpp +++ b/sycl/test-e2e/bindless_images/copies/copy_subregion_1D.cpp @@ -1,5 +1,4 @@ // REQUIRES: aspect-ext_oneapi_bindless_images -// REQUIRES: cuda // XFAIL: hip // XFAIL-TRACKER: https://github.com/intel/llvm/issues/19957 diff --git a/sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp b/sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp index f0be2d821014e..d8e5e51dfe00e 100644 --- a/sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp +++ b/sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp @@ -1,5 +1,6 @@ // REQUIRES: aspect-ext_oneapi_bindless_images -// REQUIRES: cuda +// XFAIL: linux && arch-intel_gpu_acm_g10 +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/20004 // XFAIL: hip // XFAIL-TRACKER: https://github.com/intel/llvm/issues/19957 diff --git a/sycl/test-e2e/bindless_images/copies/device_to_device_copy.cpp b/sycl/test-e2e/bindless_images/copies/device_to_device_copy.cpp index dfc3bab32ceb4..16c29717850b4 100644 --- a/sycl/test-e2e/bindless_images/copies/device_to_device_copy.cpp +++ b/sycl/test-e2e/bindless_images/copies/device_to_device_copy.cpp @@ -1,5 +1,4 @@ // REQUIRES: aspect-ext_oneapi_bindless_images -// REQUIRES: cuda // XFAIL: hip // XFAIL-TRACKER: https://github.com/intel/llvm/issues/19957 diff --git a/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp b/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp index 64e5b8d6c30d6..2568655512cdd 100644 --- a/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp +++ b/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp @@ -1,6 +1,5 @@ // REQUIRES: aspect-ext_oneapi_bindless_images // REQUIRES: aspect-ext_oneapi_bindless_images_2d_usm -// REQUIRES: cuda // XFAIL: hip // XFAIL-TRACKER: https://github.com/intel/llvm/issues/19957 // diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h index f78714b4e06aa..2d9ce0ec8b4a2 100644 --- a/unified-runtime/include/ur_api.h +++ b/unified-runtime/include/ur_api.h @@ -9800,6 +9800,23 @@ typedef enum ur_exp_image_copy_flag_t { /// @brief Bit Mask for validating ur_exp_image_copy_flags_t #define UR_EXP_IMAGE_COPY_FLAGS_MASK 0xfffffff0 +/////////////////////////////////////////////////////////////////////////////// +/// @brief Dictates the types of memory copy input and output. +typedef enum ur_exp_image_copy_input_types_t { + /// Memory to image handle + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_IMAGE = 0, + /// Image handle to memory + UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_MEM = 1, + /// Memory to Memory + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM = 2, + /// Image handle to image handle + UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE = 3, + /// @cond + UR_EXP_IMAGE_COPY_INPUT_TYPES_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_image_copy_input_types_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Sampler cubemap seamless filtering mode. typedef enum ur_exp_sampler_cubemap_filter_mode_t { @@ -10287,6 +10304,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( /// + `NULL == pCopyRegion` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_EXP_IMAGE_COPY_FLAGS_MASK & imageCopyFlags` +/// + `::UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE < +/// imageCopyInputTypes` /// - ::UR_RESULT_ERROR_INVALID_QUEUE /// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR @@ -10316,6 +10335,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( ur_exp_image_copy_region_t *pCopyRegion, /// [in] flags describing copy direction e.g. H2D or D2H ur_exp_image_copy_flags_t imageCopyFlags, + /// [in] flag describing types of source and destination pointers (USM vs + /// image handle) + ur_exp_image_copy_input_types_t imageCopyInputTypes, /// [in] size of the event wait list uint32_t numEventsInWaitList, /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -14873,6 +14895,7 @@ typedef struct ur_bindless_images_image_copy_exp_params_t { const ur_image_format_t **ppDstImageFormat; ur_exp_image_copy_region_t **ppCopyRegion; ur_exp_image_copy_flags_t *pimageCopyFlags; + ur_exp_image_copy_input_types_t *pimageCopyInputTypes; uint32_t *pnumEventsInWaitList; const ur_event_handle_t **pphEventWaitList; ur_event_handle_t **pphEvent; diff --git a/unified-runtime/include/ur_ddi.h b/unified-runtime/include/ur_ddi.h index 8ab686aa583cc..f59e15a9eb3cd 100644 --- a/unified-runtime/include/ur_ddi.h +++ b/unified-runtime/include/ur_ddi.h @@ -1412,8 +1412,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnBindlessImagesImageCopyExp_t)( ur_queue_handle_t, const void *, void *, const ur_image_desc_t *, const ur_image_desc_t *, const ur_image_format_t *, const ur_image_format_t *, ur_exp_image_copy_region_t *, - ur_exp_image_copy_flags_t, uint32_t, const ur_event_handle_t *, - ur_event_handle_t *); + ur_exp_image_copy_flags_t, ur_exp_image_copy_input_types_t, uint32_t, + const ur_event_handle_t *, ur_event_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urBindlessImagesImageGetInfoExp diff --git a/unified-runtime/include/ur_print.h b/unified-runtime/include/ur_print.h index 8130df0c5bec4..7c4528ec9ea81 100644 --- a/unified-runtime/include/ur_print.h +++ b/unified-runtime/include/ur_print.h @@ -1203,6 +1203,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpImageCopyFlags(enum ur_exp_image_copy_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_image_copy_input_types_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpImageCopyInputTypes( + enum ur_exp_image_copy_input_types_t value, char *buffer, + const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_sampler_cubemap_filter_mode_t enum /// @returns diff --git a/unified-runtime/include/ur_print.hpp b/unified-runtime/include/ur_print.hpp index 15c50dd0eb479..844a8b16949f6 100644 --- a/unified-runtime/include/ur_print.hpp +++ b/unified-runtime/include/ur_print.hpp @@ -534,6 +534,8 @@ operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_buffer_desc_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_image_copy_flag_t value); +inline std::ostream &operator<<(std::ostream &os, + enum ur_exp_image_copy_input_types_t value); inline std::ostream & operator<<(std::ostream &os, enum ur_exp_sampler_cubemap_filter_mode_t value); inline std::ostream &operator<<(std::ostream &os, @@ -11359,6 +11361,31 @@ inline ur_result_t printFlag(std::ostream &os, } } // namespace ur::details /////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_image_copy_input_types_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, + enum ur_exp_image_copy_input_types_t value) { + switch (value) { + case UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_IMAGE: + os << "UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_IMAGE"; + break; + case UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_MEM: + os << "UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_MEM"; + break; + case UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM: + os << "UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM"; + break; + case UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE: + os << "UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} +/////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_sampler_cubemap_filter_mode_t type /// @returns /// std::ostream & @@ -18269,6 +18296,11 @@ inline std::ostream &operator<<( ur::details::printFlag(os, *(params->pimageCopyFlags)); + os << ", "; + os << ".imageCopyInputTypes = "; + + os << *(params->pimageCopyInputTypes); + os << ", "; os << ".numEventsInWaitList = "; diff --git a/unified-runtime/scripts/core/exp-bindless-images.yml b/unified-runtime/scripts/core/exp-bindless-images.yml index 6ace4e7740153..05c6acec0191f 100644 --- a/unified-runtime/scripts/core/exp-bindless-images.yml +++ b/unified-runtime/scripts/core/exp-bindless-images.yml @@ -174,6 +174,20 @@ etors: desc: "Host to host" --- #-------------------------------------------------------------------------- type: enum +desc: "Dictates the types of memory copy input and output." +class: $xBindlessImages +name: $x_exp_image_copy_input_types_t +etors: + - name: MEM_TO_IMAGE + desc: "Memory to image handle" + - name: IMAGE_TO_MEM + desc: "Image handle to memory" + - name: MEM_TO_MEM + desc: "Memory to Memory" + - name: IMAGE_TO_IMAGE + desc: "Image handle to image handle" +--- #-------------------------------------------------------------------------- +type: enum extend: True desc: "Memory types" name: $x_mem_type_t @@ -602,6 +616,9 @@ params: - type: $x_exp_image_copy_flags_t name: imageCopyFlags desc: "[in] flags describing copy direction e.g. H2D or D2H" + - type: $x_exp_image_copy_input_types_t + name: imageCopyInputTypes + desc: "[in] flag describing types of source and destination pointers (USM vs image handle)" - type: uint32_t name: numEventsInWaitList desc: "[in] size of the event wait list" diff --git a/unified-runtime/source/adapters/cuda/image.cpp b/unified-runtime/source/adapters/cuda/image.cpp index 4f1f1892cddfd..5ec1e1d9a35c3 100644 --- a/unified-runtime/source/adapters/cuda/image.cpp +++ b/unified-runtime/source/adapters/cuda/image.cpp @@ -630,8 +630,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + ur_exp_image_copy_flags_t imageCopyFlags, ur_exp_image_copy_input_types_t, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { UR_ASSERT((imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE || imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST || imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE || diff --git a/unified-runtime/source/adapters/hip/image.cpp b/unified-runtime/source/adapters/hip/image.cpp index 05f04f1ff9eea..f44e025242d6a 100644 --- a/unified-runtime/source/adapters/hip/image.cpp +++ b/unified-runtime/source/adapters/hip/image.cpp @@ -625,8 +625,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + ur_exp_image_copy_flags_t imageCopyFlags, ur_exp_image_copy_input_types_t, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { UR_ASSERT((imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE || imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST || imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE), diff --git a/unified-runtime/source/adapters/level_zero/image.cpp b/unified-runtime/source/adapters/level_zero/image.cpp index ec7ddc8b2292c..5fa61e13a9e97 100644 --- a/unified-runtime/source/adapters/level_zero/image.cpp +++ b/unified-runtime/source/adapters/level_zero/image.cpp @@ -28,8 +28,10 @@ ur_result_t urBindlessImagesImageCopyExp( const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t imageCopyInputTypes, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { std::scoped_lock Lock(hQueue->Mutex); UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -43,7 +45,17 @@ ur_result_t urBindlessImagesImageCopyExp( UR_ASSERT(!(pSrcImageDesc && UR_MEM_TYPE_IMAGE1D_ARRAY < pSrcImageDesc->type), UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR); - bool UseCopyEngine = hQueue->useCopyEngine(/*PreferCopyEngine*/ true); + // When we do a region copy from an image handle to USM with non-zero offest + // into a USM region, then copy engine would ignore the offset and always + // write data at the beginning of the USM allocation. + // On the other hand, when performing memory to memory copies if copy engine + // is not used, then only half the lines are copied. + // This is wild and the change is only added because we continue to test + // both V1 and V2 L0 adapters for all HW, regardless of the default adapter + // there. + bool UseCopyEngine = + hQueue->useCopyEngine(/*PreferCopyEngine*/ imageCopyInputTypes == + UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM); // Due to the limitation of the copy engine, disable usage of Copy Engine // Given 3 channel image if (is3ChannelOrder( @@ -84,8 +96,8 @@ ur_result_t urBindlessImagesImageCopyExp( auto res = bindlessImagesHandleCopyFlags( pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, - pDstImageFormat, pCopyRegion, imageCopyFlags, ZeCommandList, ZeEvent, - WaitList.Length, WaitList.ZeEventList); + pDstImageFormat, pCopyRegion, imageCopyFlags, imageCopyInputTypes, + ZeCommandList, ZeEvent, WaitList.Length, WaitList.ZeEventList); if (res == UR_RESULT_SUCCESS) UR_CALL(hQueue->executeCommandList(CommandList, Blocking, OkToBatch)); diff --git a/unified-runtime/source/adapters/level_zero/image_common.cpp b/unified-runtime/source/adapters/level_zero/image_common.cpp index e4cbd8edbe9a8..7bb094b0ea82b 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.cpp +++ b/unified-runtime/source/adapters/level_zero/image_common.cpp @@ -791,7 +791,8 @@ ur_result_t bindlessImagesHandleCopyFlags( const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, + /* unused */ ur_exp_image_copy_flags_t, + ur_exp_image_copy_input_types_t copyImageInputTypes, ze_command_list_handle_t ZeCommandList, ze_event_handle_t zeSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { @@ -800,98 +801,57 @@ ur_result_t bindlessImagesHandleCopyFlags( uint32_t SrcPixelSizeInBytes = getPixelSizeBytes(pSrcImageFormat); uint32_t DstPixelSizeInBytes = getPixelSizeBytes(pDstImageFormat); - switch (imageCopyFlags) { - case UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE: { - uint32_t SrcRowPitch = pSrcImageDesc->rowPitch; - uint32_t SrcSlicePitch = SrcRowPitch * pSrcImageDesc->height; - if (pDstImageDesc->rowPitch == 0) { - // Copy to Non-USM memory - - ze_image_region_t DstRegion; - UR_CALL(getZeImageRegionHelper(zeSrcImageDesc, SrcPixelSizeInBytes, - &pCopyRegion->dstOffset, - &pCopyRegion->copyExtent, DstRegion)); - auto *urDstImg = static_cast(pDst); - - const char *SrcPtr = static_cast(pSrc) + - pCopyRegion->srcOffset.z * SrcSlicePitch + - pCopyRegion->srcOffset.y * SrcRowPitch + - pCopyRegion->srcOffset.x; - - ZE2UR_CALL(zeCommandListAppendImageCopyFromMemoryExt, - (ZeCommandList, urDstImg->getZeImage(), SrcPtr, &DstRegion, - SrcRowPitch, SrcSlicePitch, zeSignalEvent, numWaitEvents, - phWaitEvents)); - } else { - // Copy to pitched USM memory - uint32_t DstRowPitch = pDstImageDesc->rowPitch; - ze_copy_region_t ZeDstRegion = {(uint32_t)pCopyRegion->dstOffset.x, - (uint32_t)pCopyRegion->dstOffset.y, - (uint32_t)pCopyRegion->dstOffset.z, - DstRowPitch, - (uint32_t)pCopyRegion->copyExtent.height, - (uint32_t)pCopyRegion->copyExtent.depth}; - uint32_t DstSlicePitch = 0; - ze_copy_region_t ZeSrcRegion = {(uint32_t)pCopyRegion->srcOffset.x, - (uint32_t)pCopyRegion->srcOffset.y, - (uint32_t)pCopyRegion->srcOffset.z, - SrcRowPitch, - (uint32_t)pCopyRegion->copyExtent.height, - (uint32_t)pCopyRegion->copyExtent.depth}; - ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, - (ZeCommandList, pDst, &ZeDstRegion, DstRowPitch, DstSlicePitch, - pSrc, &ZeSrcRegion, SrcRowPitch, SrcSlicePitch, zeSignalEvent, - numWaitEvents, phWaitEvents)); - } - return UR_RESULT_SUCCESS; - }; - case UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST: { - uint32_t DstRowPitch = pDstImageDesc->rowPitch; + // Level Zero does not use terms device and host, but instead operates on + // terms image and memory. + // Image means ze_image_handle_t, memory means regular pointer. + // The choice of API to call depends on input types, not on the copy + // direction. + + switch (copyImageInputTypes) { + case UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_MEM: { + // Copy between (possibly) pitched USM regions + ze_copy_region_t ZeDstRegion = {(uint32_t)pCopyRegion->dstOffset.x, + (uint32_t)pCopyRegion->dstOffset.y, + (uint32_t)pCopyRegion->dstOffset.z, + (uint32_t)pCopyRegion->copyExtent.width, + (uint32_t)pCopyRegion->copyExtent.height, + (uint32_t)pCopyRegion->copyExtent.depth}; + ze_copy_region_t ZeSrcRegion = {(uint32_t)pCopyRegion->srcOffset.x, + (uint32_t)pCopyRegion->srcOffset.y, + (uint32_t)pCopyRegion->srcOffset.z, + (uint32_t)pCopyRegion->copyExtent.width, + (uint32_t)pCopyRegion->copyExtent.height, + (uint32_t)pCopyRegion->copyExtent.depth}; + // This function could have been called to perform a copy of a 1D image and + // copy region height could be set to 0 in this case. L0 doesn't like that, + // so we adjust it so that copy region is a valid 2D region + if (ZeSrcRegion.height == 0) + ZeSrcRegion.height = 1; + if (ZeDstRegion.height == 0) + ZeDstRegion.height = 1; + // Strictly speaking, zeCommandListAppendMemoryCopyRegion is only for 2D and + // 3D copies and as such, row pitch arguments are non-optional. + // Since urBindlessImagesImageCopy can also be called for 1D images for + // which row pitch is zero, we calculate it ourselves. + uint32_t DstRowPitch = + std::max(pDstImageDesc->rowPitch, pCopyRegion->copyExtent.width); + uint32_t SrcRowPitch = + std::max(pSrcImageDesc->rowPitch, pCopyRegion->copyExtent.width); uint32_t DstSlicePitch = DstRowPitch * pDstImageDesc->height; - if (pSrcImageDesc->rowPitch == 0) { - // Copy from Non-USM memory to host - ze_image_region_t SrcRegion; - UR_CALL(getZeImageRegionHelper(zeSrcImageDesc, SrcPixelSizeInBytes, - &pCopyRegion->srcOffset, - &pCopyRegion->copyExtent, SrcRegion)); - - auto *urSrcImg = reinterpret_cast(pSrc); - - char *DstPtr = - static_cast(pDst) + pCopyRegion->dstOffset.z * DstSlicePitch + - pCopyRegion->dstOffset.y * DstRowPitch + pCopyRegion->dstOffset.x; - ZE2UR_CALL(zeCommandListAppendImageCopyToMemoryExt, - (ZeCommandList, DstPtr, urSrcImg->getZeImage(), &SrcRegion, - DstRowPitch, DstSlicePitch, zeSignalEvent, numWaitEvents, - phWaitEvents)); - } else { - // Copy from pitched USM memory to host - ze_copy_region_t ZeDstRegion = {(uint32_t)pCopyRegion->dstOffset.x, - (uint32_t)pCopyRegion->dstOffset.y, - (uint32_t)pCopyRegion->dstOffset.z, - DstRowPitch, - (uint32_t)pCopyRegion->copyExtent.height, - (uint32_t)pCopyRegion->copyExtent.depth}; - uint32_t SrcRowPitch = pSrcImageDesc->rowPitch; - ze_copy_region_t ZeSrcRegion = {(uint32_t)pCopyRegion->srcOffset.x, - (uint32_t)pCopyRegion->srcOffset.y, - (uint32_t)pCopyRegion->srcOffset.z, - SrcRowPitch, - (uint32_t)pCopyRegion->copyExtent.height, - (uint32_t)pCopyRegion->copyExtent.depth}; - uint32_t SrcSlicePitch = 0; - ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, - (ZeCommandList, pDst, &ZeDstRegion, DstRowPitch, DstSlicePitch, - pSrc, &ZeSrcRegion, SrcRowPitch, SrcSlicePitch, zeSignalEvent, - numWaitEvents, phWaitEvents)); - } + uint32_t SrcSlicePitch = SrcRowPitch * pSrcImageDesc->height; + ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, + (ZeCommandList, pDst, &ZeDstRegion, DstRowPitch, DstSlicePitch, + pSrc, &ZeSrcRegion, SrcRowPitch, SrcSlicePitch, zeSignalEvent, + numWaitEvents, phWaitEvents)); return UR_RESULT_SUCCESS; - }; - case UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE: { + } + case UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE: { + // Copy between two ze_image_handle_t's ze_image_region_t DstRegion; UR_CALL(getZeImageRegionHelper(zeSrcImageDesc, DstPixelSizeInBytes, &pCopyRegion->dstOffset, &pCopyRegion->copyExtent, DstRegion)); + ze_image_region_t SrcRegion; UR_CALL(getZeImageRegionHelper(zeSrcImageDesc, SrcPixelSizeInBytes, &pCopyRegion->srcOffset, @@ -906,10 +866,52 @@ ur_result_t bindlessImagesHandleCopyFlags( phWaitEvents)); return UR_RESULT_SUCCESS; - }; + } + case UR_EXP_IMAGE_COPY_INPUT_TYPES_MEM_TO_IMAGE: { + // Copy from USM to ze_image_handle_t + ze_image_region_t DstRegion; + UR_CALL(getZeImageRegionHelper(zeSrcImageDesc, DstPixelSizeInBytes, + &pCopyRegion->dstOffset, + &pCopyRegion->copyExtent, DstRegion)); + + auto *urDstImg = static_cast(pDst); + + const uint32_t SrcRowPitch = pSrcImageDesc->rowPitch; + const uint32_t SrcSlicePitch = SrcRowPitch * pSrcImageDesc->height; + const char *SrcPtr = static_cast(pSrc) + + pCopyRegion->srcOffset.z * SrcSlicePitch + + pCopyRegion->srcOffset.y * SrcRowPitch + + pCopyRegion->srcOffset.x; + + ZE2UR_CALL(zeCommandListAppendImageCopyFromMemoryExt, + (ZeCommandList, urDstImg->getZeImage(), SrcPtr, &DstRegion, + SrcRowPitch, SrcSlicePitch, zeSignalEvent, numWaitEvents, + phWaitEvents)); + return UR_RESULT_SUCCESS; + } + case UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_MEM: { + // Copy from ze_image_handle_t to USM + ze_image_region_t SrcRegion; + UR_CALL(getZeImageRegionHelper(zeSrcImageDesc, SrcPixelSizeInBytes, + &pCopyRegion->srcOffset, + &pCopyRegion->copyExtent, SrcRegion)); + + auto *urSrcImg = reinterpret_cast(pSrc); + + const uint32_t DstRowPitch = pDstImageDesc->rowPitch; + const uint32_t DstSlicePitch = DstRowPitch * pDstImageDesc->height; + char *DstPtr = + static_cast(pDst) + pCopyRegion->dstOffset.z * DstSlicePitch + + pCopyRegion->dstOffset.y * DstRowPitch + pCopyRegion->dstOffset.x; + ZE2UR_CALL(zeCommandListAppendImageCopyToMemoryExt, + (ZeCommandList, DstPtr, urSrcImg->getZeImage(), &SrcRegion, + DstRowPitch, DstSlicePitch, zeSignalEvent, numWaitEvents, + phWaitEvents)); + return UR_RESULT_SUCCESS; + } default: UR_LOG(ERR, "ur_queue_immediate_in_order_t::bindlessImagesImageCopyExp: " - "unexpected imageCopyFlags"); + "unexpected inputs"); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } } diff --git a/unified-runtime/source/adapters/level_zero/image_common.hpp b/unified-runtime/source/adapters/level_zero/image_common.hpp index 8df10e528b06b..dd9d944ae92aa 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.hpp +++ b/unified-runtime/source/adapters/level_zero/image_common.hpp @@ -60,6 +60,7 @@ ur_result_t bindlessImagesHandleCopyFlags( const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t imageCopyInputTypes, ze_command_list_handle_t ZeCommandList, ze_event_handle_t zeSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); diff --git a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp index df8e93c1f768a..77bc0b7d5b737 100644 --- a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp +++ b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp @@ -562,8 +562,10 @@ ur_result_t urBindlessImagesImageCopyExp( const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); + ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t imageCopyInputTypes, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); ur_result_t urBindlessImagesImageGetInfoExp( ur_context_handle_t hContext, ur_exp_image_mem_native_handle_t hImageMem, ur_image_info_t propName, void *pPropValue, size_t *pPropSizeRet); diff --git a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp index 3561d84ae3962..abc8156f978f9 100644 --- a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp @@ -894,16 +894,19 @@ ur_result_t ur_command_list_manager::bindlessImagesImageCopyExp( const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t phEvent) { + ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t imageCopyInputTypes, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t phEvent) { auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_MEM_IMAGE_COPY); auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList); return bindlessImagesHandleCopyFlags( pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, - pDstImageFormat, pCopyRegion, imageCopyFlags, getZeCommandList(), - zeSignalEvent, waitListView.num, waitListView.handles); + pDstImageFormat, pCopyRegion, imageCopyFlags, imageCopyInputTypes, + getZeCommandList(), zeSignalEvent, waitListView.num, + waitListView.handles); } ur_result_t ur_command_list_manager::bindlessImagesWaitExternalSemaphoreExp( diff --git a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp index a7eafa8f9cecc..3c1bbd710ed47 100644 --- a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp +++ b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp @@ -182,8 +182,10 @@ struct ur_command_list_manager { const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t phEvent); + ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t imageCopyInputTypes, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t phEvent); ur_result_t bindlessImagesWaitExternalSemaphoreExp( ur_exp_external_semaphore_handle_t hSemaphore, bool hasWaitValue, uint64_t waitValue, uint32_t numEventsInWaitList, diff --git a/unified-runtime/source/adapters/level_zero/v2/queue_api.cpp b/unified-runtime/source/adapters/level_zero/v2/queue_api.cpp index d043a68dcaec7..582885ea67c46 100644 --- a/unified-runtime/source/adapters/level_zero/v2/queue_api.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/queue_api.cpp @@ -394,12 +394,14 @@ ur_result_t urBindlessImagesImageCopyExp( const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) try { + ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t imageCopyInputTypes, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) try { return hQueue->get().bindlessImagesImageCopyExp( pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, - pDstImageFormat, pCopyRegion, imageCopyFlags, numEventsInWaitList, - phEventWaitList, phEvent); + pDstImageFormat, pCopyRegion, imageCopyFlags, imageCopyInputTypes, + numEventsInWaitList, phEventWaitList, phEvent); } catch (...) { return exceptionToResult(std::current_exception()); } diff --git a/unified-runtime/source/adapters/level_zero/v2/queue_api.hpp b/unified-runtime/source/adapters/level_zero/v2/queue_api.hpp index b710f9d56b50d..4bd9d8fd2141e 100644 --- a/unified-runtime/source/adapters/level_zero/v2/queue_api.hpp +++ b/unified-runtime/source/adapters/level_zero/v2/queue_api.hpp @@ -147,8 +147,9 @@ struct ur_queue_t_ { virtual ur_result_t bindlessImagesImageCopyExp( const void *, void *, const ur_image_desc_t *, const ur_image_desc_t *, const ur_image_format_t *, const ur_image_format_t *, - ur_exp_image_copy_region_t *, ur_exp_image_copy_flags_t, uint32_t, - const ur_event_handle_t *, ur_event_handle_t *) = 0; + ur_exp_image_copy_region_t *, ur_exp_image_copy_flags_t, + ur_exp_image_copy_input_types_t, uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; virtual ur_result_t bindlessImagesWaitExternalSemaphoreExp( ur_exp_external_semaphore_handle_t, bool, uint64_t, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) = 0; diff --git a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp index 74b37d1b40eb3..3f230861ad563 100644 --- a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp +++ b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp @@ -390,13 +390,14 @@ struct ur_queue_immediate_in_order_t : ur_object, ur_queue_t_ { const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, + ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t imageCopyInputTypes, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) override { return commandListManager.lock()->bindlessImagesImageCopyExp( pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, - pDstImageFormat, pCopyRegion, imageCopyFlags, numEventsInWaitList, - phEventWaitList, + pDstImageFormat, pCopyRegion, imageCopyFlags, imageCopyInputTypes, + numEventsInWaitList, phEventWaitList, createEventIfRequested(eventPool.get(), phEvent, this)); } diff --git a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_out_of_order.hpp b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_out_of_order.hpp index 07e8743154ded..f1ad68a62a1a8 100644 --- a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_out_of_order.hpp +++ b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_out_of_order.hpp @@ -433,14 +433,15 @@ struct ur_queue_immediate_out_of_order_t : ur_object, ur_queue_t_ { const ur_image_format_t *pSrcImageFormat, const ur_image_format_t *pDstImageFormat, ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, + ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t imageCopyInputTypes, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) override { auto commandListId = getNextCommandListId(); return commandListManagers.lock()[commandListId].bindlessImagesImageCopyExp( pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, - pDstImageFormat, pCopyRegion, imageCopyFlags, numEventsInWaitList, - phEventWaitList, + pDstImageFormat, pCopyRegion, imageCopyFlags, imageCopyInputTypes, + numEventsInWaitList, phEventWaitList, createEventIfRequested(eventPool.get(), phEvent, this)); } diff --git a/unified-runtime/source/adapters/mock/ur_mockddi.cpp b/unified-runtime/source/adapters/mock/ur_mockddi.cpp index 7956f048db92e..c7ecf0979b8f5 100644 --- a/unified-runtime/source/adapters/mock/ur_mockddi.cpp +++ b/unified-runtime/source/adapters/mock/ur_mockddi.cpp @@ -8510,6 +8510,9 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( ur_exp_image_copy_region_t *pCopyRegion, /// [in] flags describing copy direction e.g. H2D or D2H ur_exp_image_copy_flags_t imageCopyFlags, + /// [in] flag describing types of source and destination pointers (USM vs + /// image handle) + ur_exp_image_copy_input_types_t imageCopyInputTypes, /// [in] size of the event wait list uint32_t numEventsInWaitList, /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -8533,6 +8536,7 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( &pDstImageFormat, &pCopyRegion, &imageCopyFlags, + &imageCopyInputTypes, &numEventsInWaitList, &phEventWaitList, &phEvent}; diff --git a/unified-runtime/source/adapters/native_cpu/image.cpp b/unified-runtime/source/adapters/native_cpu/image.cpp index 18d02de897df2..4a4fc96dbf414 100644 --- a/unified-runtime/source/adapters/native_cpu/image.cpp +++ b/unified-runtime/source/adapters/native_cpu/image.cpp @@ -83,6 +83,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( [[maybe_unused]] const ur_image_format_t *pDstImageFormat, [[maybe_unused]] ur_exp_image_copy_region_t *pCopyRegion, [[maybe_unused]] ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t, [[maybe_unused]] uint32_t numEventsInWaitList, [[maybe_unused]] const ur_event_handle_t *phEventWaitList, [[maybe_unused]] ur_event_handle_t *phEvent) { diff --git a/unified-runtime/source/adapters/opencl/image.cpp b/unified-runtime/source/adapters/opencl/image.cpp index 3ef27c7f5f7ac..573d2fa4f9219 100644 --- a/unified-runtime/source/adapters/opencl/image.cpp +++ b/unified-runtime/source/adapters/opencl/image.cpp @@ -84,6 +84,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( [[maybe_unused]] const ur_image_format_t *pDstImageFormat, [[maybe_unused]] ur_exp_image_copy_region_t *pCopyRegion, [[maybe_unused]] ur_exp_image_copy_flags_t imageCopyFlags, + ur_exp_image_copy_input_types_t, [[maybe_unused]] uint32_t numEventsInWaitList, [[maybe_unused]] const ur_event_handle_t *phEventWaitList, [[maybe_unused]] ur_event_handle_t *phEvent) { diff --git a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp index d096d3895c385..0bca15321849d 100644 --- a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp +++ b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp @@ -7121,6 +7121,9 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( ur_exp_image_copy_region_t *pCopyRegion, /// [in] flags describing copy direction e.g. H2D or D2H ur_exp_image_copy_flags_t imageCopyFlags, + /// [in] flag describing types of source and destination pointers (USM vs + /// image handle) + ur_exp_image_copy_input_types_t imageCopyInputTypes, /// [in] size of the event wait list uint32_t numEventsInWaitList, /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -7148,6 +7151,7 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( &pDstImageFormat, &pCopyRegion, &imageCopyFlags, + &imageCopyInputTypes, &numEventsInWaitList, &phEventWaitList, &phEvent}; @@ -7160,8 +7164,8 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( ur_result_t result = pfnImageCopyExp( hQueue, pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, - pDstImageFormat, pCopyRegion, imageCopyFlags, numEventsInWaitList, - phEventWaitList, phEvent); + pDstImageFormat, pCopyRegion, imageCopyFlags, imageCopyInputTypes, + numEventsInWaitList, phEventWaitList, phEvent); getContext()->notify_end(UR_FUNCTION_BINDLESS_IMAGES_IMAGE_COPY_EXP, "urBindlessImagesImageCopyExp", ¶ms, &result, diff --git a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp index 32dec6f1b25df..81151d4124b10 100644 --- a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp +++ b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp @@ -7891,6 +7891,9 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( ur_exp_image_copy_region_t *pCopyRegion, /// [in] flags describing copy direction e.g. H2D or D2H ur_exp_image_copy_flags_t imageCopyFlags, + /// [in] flag describing types of source and destination pointers (USM vs + /// image handle) + ur_exp_image_copy_input_types_t imageCopyInputTypes, /// [in] size of the event wait list uint32_t numEventsInWaitList, /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -7938,6 +7941,9 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( if (UR_EXP_IMAGE_COPY_FLAGS_MASK & imageCopyFlags) return UR_RESULT_ERROR_INVALID_ENUMERATION; + if (UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE < imageCopyInputTypes) + return UR_RESULT_ERROR_INVALID_ENUMERATION; + if (pSrcImageDesc && UR_MEM_TYPE_IMAGE_CUBEMAP_EXP < pSrcImageDesc->type) return UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR; @@ -7960,8 +7966,8 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( ur_result_t result = pfnImageCopyExp( hQueue, pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, - pDstImageFormat, pCopyRegion, imageCopyFlags, numEventsInWaitList, - phEventWaitList, phEvent); + pDstImageFormat, pCopyRegion, imageCopyFlags, imageCopyInputTypes, + numEventsInWaitList, phEventWaitList, phEvent); return result; } diff --git a/unified-runtime/source/loader/loader.def.in b/unified-runtime/source/loader/loader.def.in index 3ad47149315ee..e86a6c65a7957 100644 --- a/unified-runtime/source/loader/loader.def.in +++ b/unified-runtime/source/loader/loader.def.in @@ -346,6 +346,7 @@ EXPORTS urPrintExpExternalSemaphoreType urPrintExpFileDescriptor urPrintExpImageCopyFlags + urPrintExpImageCopyInputTypes urPrintExpImageCopyRegion urPrintExpImageMemType urPrintExpPeerInfo diff --git a/unified-runtime/source/loader/loader.map.in b/unified-runtime/source/loader/loader.map.in index fde803f9aa45a..6a30c9186f674 100644 --- a/unified-runtime/source/loader/loader.map.in +++ b/unified-runtime/source/loader/loader.map.in @@ -346,6 +346,7 @@ urPrintExpExternalSemaphoreType; urPrintExpFileDescriptor; urPrintExpImageCopyFlags; + urPrintExpImageCopyInputTypes; urPrintExpImageCopyRegion; urPrintExpImageMemType; urPrintExpPeerInfo; diff --git a/unified-runtime/source/loader/ur_ldrddi.cpp b/unified-runtime/source/loader/ur_ldrddi.cpp index 75ae04bc5a4a8..d943fe99afdb6 100644 --- a/unified-runtime/source/loader/ur_ldrddi.cpp +++ b/unified-runtime/source/loader/ur_ldrddi.cpp @@ -4079,6 +4079,9 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( ur_exp_image_copy_region_t *pCopyRegion, /// [in] flags describing copy direction e.g. H2D or D2H ur_exp_image_copy_flags_t imageCopyFlags, + /// [in] flag describing types of source and destination pointers (USM vs + /// image handle) + ur_exp_image_copy_input_types_t imageCopyInputTypes, /// [in] size of the event wait list uint32_t numEventsInWaitList, /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -4101,8 +4104,8 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( // forward to device-platform return pfnImageCopyExp(hQueue, pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, pDstImageFormat, pCopyRegion, - imageCopyFlags, numEventsInWaitList, phEventWaitList, - phEvent); + imageCopyFlags, imageCopyInputTypes, + numEventsInWaitList, phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// diff --git a/unified-runtime/source/loader/ur_libapi.cpp b/unified-runtime/source/loader/ur_libapi.cpp index 4ec2282647e80..a12bf50281efa 100644 --- a/unified-runtime/source/loader/ur_libapi.cpp +++ b/unified-runtime/source/loader/ur_libapi.cpp @@ -7716,6 +7716,8 @@ ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( /// + `NULL == pCopyRegion` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_EXP_IMAGE_COPY_FLAGS_MASK & imageCopyFlags` +/// + `::UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE < +/// imageCopyInputTypes` /// - ::UR_RESULT_ERROR_INVALID_QUEUE /// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR @@ -7745,6 +7747,9 @@ ur_result_t UR_APICALL urBindlessImagesImageCopyExp( ur_exp_image_copy_region_t *pCopyRegion, /// [in] flags describing copy direction e.g. H2D or D2H ur_exp_image_copy_flags_t imageCopyFlags, + /// [in] flag describing types of source and destination pointers (USM vs + /// image handle) + ur_exp_image_copy_input_types_t imageCopyInputTypes, /// [in] size of the event wait list uint32_t numEventsInWaitList, /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -7764,8 +7769,8 @@ ur_result_t UR_APICALL urBindlessImagesImageCopyExp( return pfnImageCopyExp(hQueue, pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, pDstImageFormat, pCopyRegion, - imageCopyFlags, numEventsInWaitList, phEventWaitList, - phEvent); + imageCopyFlags, imageCopyInputTypes, + numEventsInWaitList, phEventWaitList, phEvent); } catch (...) { return exceptionToResult(std::current_exception()); } diff --git a/unified-runtime/source/loader/ur_print.cpp b/unified-runtime/source/loader/ur_print.cpp index f3d5c96e376ca..06619c8f7f625 100644 --- a/unified-runtime/source/loader/ur_print.cpp +++ b/unified-runtime/source/loader/ur_print.cpp @@ -965,6 +965,15 @@ ur_result_t urPrintExpImageCopyFlags(enum ur_exp_image_copy_flag_t value, return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t +urPrintExpImageCopyInputTypes(enum ur_exp_image_copy_input_types_t value, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintExpSamplerCubemapFilterMode( enum ur_exp_sampler_cubemap_filter_mode_t value, char *buffer, const size_t buff_size, size_t *out_size) { diff --git a/unified-runtime/source/ur_api.cpp b/unified-runtime/source/ur_api.cpp index 8e3424b693e62..eb8e6f128d2d2 100644 --- a/unified-runtime/source/ur_api.cpp +++ b/unified-runtime/source/ur_api.cpp @@ -6758,6 +6758,8 @@ ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( /// + `NULL == pCopyRegion` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_EXP_IMAGE_COPY_FLAGS_MASK & imageCopyFlags` +/// + `::UR_EXP_IMAGE_COPY_INPUT_TYPES_IMAGE_TO_IMAGE < +/// imageCopyInputTypes` /// - ::UR_RESULT_ERROR_INVALID_QUEUE /// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR @@ -6787,6 +6789,9 @@ ur_result_t UR_APICALL urBindlessImagesImageCopyExp( ur_exp_image_copy_region_t *pCopyRegion, /// [in] flags describing copy direction e.g. H2D or D2H ur_exp_image_copy_flags_t imageCopyFlags, + /// [in] flag describing types of source and destination pointers (USM vs + /// image handle) + ur_exp_image_copy_input_types_t imageCopyInputTypes, /// [in] size of the event wait list uint32_t numEventsInWaitList, /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of