Skip to content

Commit 3d6a85d

Browse files
[SYCL] Use correct units when specifying image offset and extent (#19936)
`urBindlessImagesImageCopyExp` documents its argument that describe copy region as using `bytes-pixel-pixel` format, while we were mistakenly passing `pixel-pixel-pixel` format in there. This PR adjusts that. It is an `NFC` change for CUDA & HIP adapters, because for them it only changes the place where pixel to byte conversion happens, but the change is tricker for L0 adapter. In L0, there are APIs which expect `pixel-pixel-pixel` format and there are APIs which accept `bytes-pixel-pixel` format, so some extra care is needed there. --------- Co-authored-by: Steffen Larsen <[email protected]>
1 parent 6ec3aed commit 3d6a85d

File tree

4 files changed

+147
-123
lines changed

4 files changed

+147
-123
lines changed

sycl/source/handler.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -267,18 +267,27 @@ fill_copy_args(detail::handler_impl *impl,
267267
auto ZCopyExtentComponent = detail::fill_image_type(SrcImgDesc, UrSrcDesc);
268268
detail::fill_image_type(DestImgDesc, UrDestDesc);
269269

270-
impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]};
271-
impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]};
270+
// Copy args computed here are directly passed to UR. Various offsets and
271+
// extents end up passed as ur_rect_offset_t and ur_rect_region_t. Both those
272+
// structs expect their first component to be in bytes, not in pixels
273+
size_t SrcPixelSize = SrcImgDesc.num_channels * get_channel_size(SrcImgDesc);
274+
size_t DestPixelSize =
275+
DestImgDesc.num_channels * get_channel_size(DestImgDesc);
276+
277+
impl->MSrcOffset = {SrcOffset[0] * SrcPixelSize, SrcOffset[1], SrcOffset[2]};
278+
impl->MDestOffset = {DestOffset[0] * DestPixelSize, DestOffset[1],
279+
DestOffset[2]};
272280
impl->MSrcImageDesc = UrSrcDesc;
273281
impl->MDstImageDesc = UrDestDesc;
274282
impl->MSrcImageFormat = UrSrcFormat;
275283
impl->MDstImageFormat = UrDestFormat;
276284
impl->MImageCopyFlags = ImageCopyFlags;
277285

278286
if (CopyExtent.size() != 0) {
279-
impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]};
287+
impl->MCopyExtent = {CopyExtent[0] * SrcPixelSize, CopyExtent[1],
288+
CopyExtent[2]};
280289
} else {
281-
impl->MCopyExtent = {SrcImgDesc.width, SrcImgDesc.height,
290+
impl->MCopyExtent = {SrcImgDesc.width * SrcPixelSize, SrcImgDesc.height,
282291
ZCopyExtentComponent};
283292
}
284293

unified-runtime/source/adapters/cuda/image.cpp

Lines changed: 47 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -645,7 +645,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
645645
};
646646

647647
unsigned int NumChannels = 0;
648-
size_t PixelSizeBytes = 0;
648+
[[maybe_unused]] size_t PixelSizeBytes = 0;
649649

650650
UR_CALL(urCalculateNumChannels(pSrcImageFormat->channelOrder, &NumChannels));
651651

@@ -673,19 +673,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
673673
cuPointerGetAttribute(&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
674674
(CUdeviceptr)pDst) != CUDA_SUCCESS;
675675

676-
size_t CopyExtentBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
677-
const char *SrcWithOffset = static_cast<const char *>(pSrc) +
678-
(pCopyRegion->srcOffset.x * PixelSizeBytes);
676+
size_t CopyExtentBytes = pCopyRegion->copyExtent.width;
677+
const char *SrcWithOffset =
678+
static_cast<const char *>(pSrc) + pCopyRegion->srcOffset.x;
679679

680680
if (isCudaArray) {
681-
UR_CHECK_ERROR(cuMemcpyHtoAAsync(
682-
(CUarray)pDst, pCopyRegion->dstOffset.x * PixelSizeBytes,
683-
static_cast<const void *>(SrcWithOffset), CopyExtentBytes,
684-
Stream));
681+
UR_CHECK_ERROR(
682+
cuMemcpyHtoAAsync((CUarray)pDst, pCopyRegion->dstOffset.x,
683+
static_cast<const void *>(SrcWithOffset),
684+
CopyExtentBytes, Stream));
685685
} else if (memType == CU_MEMORYTYPE_DEVICE) {
686-
void *DstWithOffset =
687-
static_cast<void *>(static_cast<char *>(pDst) +
688-
(PixelSizeBytes * pCopyRegion->dstOffset.x));
686+
void *DstWithOffset = static_cast<void *>(static_cast<char *>(pDst) +
687+
pCopyRegion->dstOffset.x);
689688
UR_CHECK_ERROR(
690689
cuMemcpyHtoDAsync((CUdeviceptr)DstWithOffset,
691690
static_cast<const void *>(SrcWithOffset),
@@ -698,11 +697,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
698697
CUDA_MEMCPY2D cpy_desc = {};
699698
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
700699
cpy_desc.srcHost = pSrc;
701-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
700+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
702701
cpy_desc.srcY = pCopyRegion->srcOffset.y;
703-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
702+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
704703
cpy_desc.dstY = pCopyRegion->dstOffset.y;
705-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
704+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
706705
cpy_desc.Height = pCopyRegion->copyExtent.height;
707706
cpy_desc.srcPitch = pSrcImageDesc->rowPitch;
708707
if (pDstImageDesc->rowPitch == 0) {
@@ -717,10 +716,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
717716
UR_CHECK_ERROR(cuMemcpy2DAsync(&cpy_desc, Stream));
718717
} else if (pDstImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
719718
CUDA_MEMCPY3D cpy_desc = {};
720-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
719+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
721720
cpy_desc.srcY = pCopyRegion->srcOffset.y;
722721
cpy_desc.srcZ = pCopyRegion->srcOffset.z;
723-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
722+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
724723
cpy_desc.dstY = pCopyRegion->dstOffset.y;
725724
cpy_desc.dstZ = pCopyRegion->dstOffset.z;
726725
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
@@ -729,18 +728,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
729728
cpy_desc.srcHeight = pSrcImageDesc->height;
730729
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
731730
cpy_desc.dstArray = (CUarray)pDst;
732-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
731+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
733732
cpy_desc.Height = pCopyRegion->copyExtent.height;
734733
cpy_desc.Depth = pCopyRegion->copyExtent.depth;
735734
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
736735
} else if (pDstImageDesc->type == UR_MEM_TYPE_IMAGE1D_ARRAY ||
737736
pDstImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY ||
738737
pDstImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) {
739738
CUDA_MEMCPY3D cpy_desc = {};
740-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
739+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
741740
cpy_desc.srcY = pCopyRegion->srcOffset.y;
742741
cpy_desc.srcZ = pCopyRegion->srcOffset.z;
743-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
742+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
744743
cpy_desc.dstY = pCopyRegion->dstOffset.y;
745744
cpy_desc.dstZ = pCopyRegion->dstOffset.z;
746745
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
@@ -749,7 +748,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
749748
cpy_desc.srcHeight = std::max(uint64_t{1}, pSrcImageDesc->height);
750749
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
751750
cpy_desc.dstArray = (CUarray)pDst;
752-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
751+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
753752
cpy_desc.Height = std::max(uint64_t{1}, pCopyRegion->copyExtent.height);
754753
cpy_desc.Depth = pCopyRegion->copyExtent.depth;
755754
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
@@ -764,20 +763,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
764763
cuPointerGetAttribute(&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
765764
(CUdeviceptr)pSrc) != CUDA_SUCCESS;
766765

767-
size_t CopyExtentBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
768-
void *DstWithOffset =
769-
static_cast<void *>(static_cast<char *>(pDst) +
770-
(PixelSizeBytes * pCopyRegion->dstOffset.x));
766+
size_t CopyExtentBytes = pCopyRegion->copyExtent.width;
767+
void *DstWithOffset = static_cast<void *>(static_cast<char *>(pDst) +
768+
pCopyRegion->dstOffset.x);
771769

772770
if (isCudaArray) {
773-
UR_CHECK_ERROR(
774-
cuMemcpyAtoHAsync(DstWithOffset, as_CUArray(pSrc),
775-
PixelSizeBytes * pCopyRegion->srcOffset.x,
776-
CopyExtentBytes, Stream));
771+
UR_CHECK_ERROR(cuMemcpyAtoHAsync(DstWithOffset, as_CUArray(pSrc),
772+
pCopyRegion->srcOffset.x,
773+
CopyExtentBytes, Stream));
777774
} else if (memType == CU_MEMORYTYPE_DEVICE) {
778775
const char *SrcWithOffset =
779-
static_cast<const char *>(pSrc) +
780-
(pCopyRegion->srcOffset.x * PixelSizeBytes);
776+
static_cast<const char *>(pSrc) + pCopyRegion->srcOffset.x;
781777
UR_CHECK_ERROR(cuMemcpyDtoHAsync(DstWithOffset,
782778
(CUdeviceptr)SrcWithOffset,
783779
CopyExtentBytes, Stream));
@@ -787,11 +783,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
787783
}
788784
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
789785
CUDA_MEMCPY2D cpy_desc = {};
790-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
786+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
791787
cpy_desc.srcY = pCopyRegion->srcOffset.y;
792-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
788+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
793789
cpy_desc.dstY = pCopyRegion->dstOffset.y;
794-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
790+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
795791
cpy_desc.Height = pCopyRegion->copyExtent.height;
796792
cpy_desc.dstPitch = pDstImageDesc->rowPitch;
797793
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
@@ -808,10 +804,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
808804
UR_CHECK_ERROR(cuMemcpy2DAsync(&cpy_desc, Stream));
809805
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
810806
CUDA_MEMCPY3D cpy_desc = {};
811-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
807+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
812808
cpy_desc.srcY = pCopyRegion->srcOffset.y;
813809
cpy_desc.srcZ = pCopyRegion->srcOffset.z;
814-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
810+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
815811
cpy_desc.dstY = pCopyRegion->dstOffset.y;
816812
cpy_desc.dstZ = pCopyRegion->dstOffset.z;
817813
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -820,18 +816,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
820816
cpy_desc.dstHost = pDst;
821817
cpy_desc.dstPitch = pDstImageDesc->rowPitch;
822818
cpy_desc.dstHeight = pDstImageDesc->height;
823-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
819+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
824820
cpy_desc.Height = pCopyRegion->copyExtent.height;
825821
cpy_desc.Depth = pCopyRegion->copyExtent.depth;
826822
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
827823
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE1D_ARRAY ||
828824
pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY ||
829825
pSrcImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) {
830826
CUDA_MEMCPY3D cpy_desc = {};
831-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
827+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
832828
cpy_desc.srcY = pCopyRegion->srcOffset.y;
833829
cpy_desc.srcZ = pCopyRegion->srcOffset.z;
834-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
830+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
835831
cpy_desc.dstY = pCopyRegion->dstOffset.y;
836832
cpy_desc.dstZ = pCopyRegion->dstOffset.z;
837833
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -840,7 +836,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
840836
cpy_desc.dstHost = pDst;
841837
cpy_desc.dstPitch = pDstImageDesc->rowPitch;
842838
cpy_desc.dstHeight = std::max(uint64_t{1}, pDstImageDesc->height);
843-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
839+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
844840
cpy_desc.Height = std::max(uint64_t{1}, pCopyRegion->copyExtent.height);
845841
cpy_desc.Depth = pCopyRegion->copyExtent.depth;
846842
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
@@ -874,11 +870,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
874870
(CUdeviceptr)pDst) != CUDA_SUCCESS;
875871

876872
CUDA_MEMCPY2D cpy_desc = {};
877-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
873+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
878874
cpy_desc.srcY = 0;
879-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
875+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
880876
cpy_desc.dstY = 0;
881-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
877+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
882878
cpy_desc.Height = 1;
883879
if (isSrcCudaArray) {
884880
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -897,11 +893,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
897893
UR_CHECK_ERROR(cuMemcpy2DAsync(&cpy_desc, Stream));
898894
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
899895
CUDA_MEMCPY2D cpy_desc = {};
900-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
896+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
901897
cpy_desc.srcY = pCopyRegion->srcOffset.y;
902-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
898+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
903899
cpy_desc.dstY = pCopyRegion->dstOffset.y;
904-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
900+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
905901
cpy_desc.Height = pCopyRegion->copyExtent.height;
906902
if (pSrcImageDesc->rowPitch == 0) {
907903
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -924,35 +920,35 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
924920
UR_CHECK_ERROR(cuMemcpy2DAsync(&cpy_desc, Stream));
925921
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
926922
CUDA_MEMCPY3D cpy_desc = {};
927-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
923+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
928924
cpy_desc.srcY = pCopyRegion->srcOffset.y;
929925
cpy_desc.srcZ = pCopyRegion->srcOffset.z;
930-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
926+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
931927
cpy_desc.dstY = pCopyRegion->dstOffset.y;
932928
cpy_desc.dstZ = pCopyRegion->dstOffset.z;
933929
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
934930
cpy_desc.srcArray = as_CUArray(pSrc);
935931
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
936932
cpy_desc.dstArray = (CUarray)pDst;
937-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
933+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
938934
cpy_desc.Height = pCopyRegion->copyExtent.height;
939935
cpy_desc.Depth = pCopyRegion->copyExtent.depth;
940936
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
941937
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE1D_ARRAY ||
942938
pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY ||
943939
pSrcImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) {
944940
CUDA_MEMCPY3D cpy_desc = {};
945-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
941+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
946942
cpy_desc.srcY = pCopyRegion->srcOffset.y;
947943
cpy_desc.srcZ = pCopyRegion->srcOffset.z;
948-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
944+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
949945
cpy_desc.dstY = pCopyRegion->dstOffset.y;
950946
cpy_desc.dstZ = pCopyRegion->dstOffset.z;
951947
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
952948
cpy_desc.srcArray = as_CUArray(pSrc);
953949
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
954950
cpy_desc.dstArray = (CUarray)pDst;
955-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
951+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
956952
cpy_desc.Height = std::max(uint64_t{1}, pCopyRegion->copyExtent.height);
957953
cpy_desc.Depth = pCopyRegion->copyExtent.depth;
958954
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));

0 commit comments

Comments
 (0)