@@ -645,7 +645,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
645
645
};
646
646
647
647
unsigned int NumChannels = 0 ;
648
- size_t PixelSizeBytes = 0 ;
648
+ [[maybe_unused]] size_t PixelSizeBytes = 0 ;
649
649
650
650
UR_CALL (urCalculateNumChannels (pSrcImageFormat->channelOrder , &NumChannels));
651
651
@@ -673,19 +673,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
673
673
cuPointerGetAttribute (&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
674
674
(CUdeviceptr)pDst) != CUDA_SUCCESS;
675
675
676
- size_t CopyExtentBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
677
- const char *SrcWithOffset = static_cast < const char *>(pSrc) +
678
- ( pCopyRegion->srcOffset .x * PixelSizeBytes) ;
676
+ size_t CopyExtentBytes = pCopyRegion->copyExtent .width ;
677
+ const char *SrcWithOffset =
678
+ static_cast < const char *>(pSrc) + pCopyRegion->srcOffset .x ;
679
679
680
680
if (isCudaArray) {
681
- UR_CHECK_ERROR (cuMemcpyHtoAAsync (
682
- ( CUarray)pDst, pCopyRegion->dstOffset .x * PixelSizeBytes ,
683
- static_cast <const void *>(SrcWithOffset), CopyExtentBytes ,
684
- Stream));
681
+ UR_CHECK_ERROR (
682
+ cuMemcpyHtoAAsync (( CUarray)pDst, pCopyRegion->dstOffset .x ,
683
+ static_cast <const void *>(SrcWithOffset),
684
+ CopyExtentBytes, Stream));
685
685
} else if (memType == CU_MEMORYTYPE_DEVICE) {
686
- void *DstWithOffset =
687
- static_cast <void *>(static_cast <char *>(pDst) +
688
- (PixelSizeBytes * pCopyRegion->dstOffset .x ));
686
+ void *DstWithOffset = static_cast <void *>(static_cast <char *>(pDst) +
687
+ pCopyRegion->dstOffset .x );
689
688
UR_CHECK_ERROR (
690
689
cuMemcpyHtoDAsync ((CUdeviceptr)DstWithOffset,
691
690
static_cast <const void *>(SrcWithOffset),
@@ -698,11 +697,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
698
697
CUDA_MEMCPY2D cpy_desc = {};
699
698
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
700
699
cpy_desc.srcHost = pSrc;
701
- cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x * PixelSizeBytes ;
700
+ cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x ;
702
701
cpy_desc.srcY = pCopyRegion->srcOffset .y ;
703
- cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x * PixelSizeBytes ;
702
+ cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x ;
704
703
cpy_desc.dstY = pCopyRegion->dstOffset .y ;
705
- cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
704
+ cpy_desc.WidthInBytes = pCopyRegion->copyExtent .width ;
706
705
cpy_desc.Height = pCopyRegion->copyExtent .height ;
707
706
cpy_desc.srcPitch = pSrcImageDesc->rowPitch ;
708
707
if (pDstImageDesc->rowPitch == 0 ) {
@@ -717,10 +716,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
717
716
UR_CHECK_ERROR (cuMemcpy2DAsync (&cpy_desc, Stream));
718
717
} else if (pDstImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
719
718
CUDA_MEMCPY3D cpy_desc = {};
720
- cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x * PixelSizeBytes ;
719
+ cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x ;
721
720
cpy_desc.srcY = pCopyRegion->srcOffset .y ;
722
721
cpy_desc.srcZ = pCopyRegion->srcOffset .z ;
723
- cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x * PixelSizeBytes ;
722
+ cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x ;
724
723
cpy_desc.dstY = pCopyRegion->dstOffset .y ;
725
724
cpy_desc.dstZ = pCopyRegion->dstOffset .z ;
726
725
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
@@ -729,18 +728,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
729
728
cpy_desc.srcHeight = pSrcImageDesc->height ;
730
729
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
731
730
cpy_desc.dstArray = (CUarray)pDst;
732
- cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
731
+ cpy_desc.WidthInBytes = pCopyRegion->copyExtent .width ;
733
732
cpy_desc.Height = pCopyRegion->copyExtent .height ;
734
733
cpy_desc.Depth = pCopyRegion->copyExtent .depth ;
735
734
UR_CHECK_ERROR (cuMemcpy3DAsync (&cpy_desc, Stream));
736
735
} else if (pDstImageDesc->type == UR_MEM_TYPE_IMAGE1D_ARRAY ||
737
736
pDstImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY ||
738
737
pDstImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) {
739
738
CUDA_MEMCPY3D cpy_desc = {};
740
- cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x * PixelSizeBytes ;
739
+ cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x ;
741
740
cpy_desc.srcY = pCopyRegion->srcOffset .y ;
742
741
cpy_desc.srcZ = pCopyRegion->srcOffset .z ;
743
- cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x * PixelSizeBytes ;
742
+ cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x ;
744
743
cpy_desc.dstY = pCopyRegion->dstOffset .y ;
745
744
cpy_desc.dstZ = pCopyRegion->dstOffset .z ;
746
745
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
@@ -749,7 +748,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
749
748
cpy_desc.srcHeight = std::max (uint64_t {1 }, pSrcImageDesc->height );
750
749
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
751
750
cpy_desc.dstArray = (CUarray)pDst;
752
- cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
751
+ cpy_desc.WidthInBytes = pCopyRegion->copyExtent .width ;
753
752
cpy_desc.Height = std::max (uint64_t {1 }, pCopyRegion->copyExtent .height );
754
753
cpy_desc.Depth = pCopyRegion->copyExtent .depth ;
755
754
UR_CHECK_ERROR (cuMemcpy3DAsync (&cpy_desc, Stream));
@@ -764,20 +763,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
764
763
cuPointerGetAttribute (&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
765
764
(CUdeviceptr)pSrc) != CUDA_SUCCESS;
766
765
767
- size_t CopyExtentBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
768
- void *DstWithOffset =
769
- static_cast <void *>(static_cast <char *>(pDst) +
770
- (PixelSizeBytes * pCopyRegion->dstOffset .x ));
766
+ size_t CopyExtentBytes = pCopyRegion->copyExtent .width ;
767
+ void *DstWithOffset = static_cast <void *>(static_cast <char *>(pDst) +
768
+ pCopyRegion->dstOffset .x );
771
769
772
770
if (isCudaArray) {
773
- UR_CHECK_ERROR (
774
- cuMemcpyAtoHAsync (DstWithOffset, as_CUArray (pSrc),
775
- PixelSizeBytes * pCopyRegion->srcOffset .x ,
776
- CopyExtentBytes, Stream));
771
+ UR_CHECK_ERROR (cuMemcpyAtoHAsync (DstWithOffset, as_CUArray (pSrc),
772
+ pCopyRegion->srcOffset .x ,
773
+ CopyExtentBytes, Stream));
777
774
} else if (memType == CU_MEMORYTYPE_DEVICE) {
778
775
const char *SrcWithOffset =
779
- static_cast <const char *>(pSrc) +
780
- (pCopyRegion->srcOffset .x * PixelSizeBytes);
776
+ static_cast <const char *>(pSrc) + pCopyRegion->srcOffset .x ;
781
777
UR_CHECK_ERROR (cuMemcpyDtoHAsync (DstWithOffset,
782
778
(CUdeviceptr)SrcWithOffset,
783
779
CopyExtentBytes, Stream));
@@ -787,11 +783,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
787
783
}
788
784
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
789
785
CUDA_MEMCPY2D cpy_desc = {};
790
- cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x * PixelSizeBytes ;
786
+ cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x ;
791
787
cpy_desc.srcY = pCopyRegion->srcOffset .y ;
792
- cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x * PixelSizeBytes ;
788
+ cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x ;
793
789
cpy_desc.dstY = pCopyRegion->dstOffset .y ;
794
- cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
790
+ cpy_desc.WidthInBytes = pCopyRegion->copyExtent .width ;
795
791
cpy_desc.Height = pCopyRegion->copyExtent .height ;
796
792
cpy_desc.dstPitch = pDstImageDesc->rowPitch ;
797
793
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
@@ -808,10 +804,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
808
804
UR_CHECK_ERROR (cuMemcpy2DAsync (&cpy_desc, Stream));
809
805
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
810
806
CUDA_MEMCPY3D cpy_desc = {};
811
- cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x * PixelSizeBytes ;
807
+ cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x ;
812
808
cpy_desc.srcY = pCopyRegion->srcOffset .y ;
813
809
cpy_desc.srcZ = pCopyRegion->srcOffset .z ;
814
- cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x * PixelSizeBytes ;
810
+ cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x ;
815
811
cpy_desc.dstY = pCopyRegion->dstOffset .y ;
816
812
cpy_desc.dstZ = pCopyRegion->dstOffset .z ;
817
813
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -820,18 +816,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
820
816
cpy_desc.dstHost = pDst;
821
817
cpy_desc.dstPitch = pDstImageDesc->rowPitch ;
822
818
cpy_desc.dstHeight = pDstImageDesc->height ;
823
- cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
819
+ cpy_desc.WidthInBytes = pCopyRegion->copyExtent .width ;
824
820
cpy_desc.Height = pCopyRegion->copyExtent .height ;
825
821
cpy_desc.Depth = pCopyRegion->copyExtent .depth ;
826
822
UR_CHECK_ERROR (cuMemcpy3DAsync (&cpy_desc, Stream));
827
823
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE1D_ARRAY ||
828
824
pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY ||
829
825
pSrcImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) {
830
826
CUDA_MEMCPY3D cpy_desc = {};
831
- cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x * PixelSizeBytes ;
827
+ cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x ;
832
828
cpy_desc.srcY = pCopyRegion->srcOffset .y ;
833
829
cpy_desc.srcZ = pCopyRegion->srcOffset .z ;
834
- cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x * PixelSizeBytes ;
830
+ cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x ;
835
831
cpy_desc.dstY = pCopyRegion->dstOffset .y ;
836
832
cpy_desc.dstZ = pCopyRegion->dstOffset .z ;
837
833
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -840,7 +836,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
840
836
cpy_desc.dstHost = pDst;
841
837
cpy_desc.dstPitch = pDstImageDesc->rowPitch ;
842
838
cpy_desc.dstHeight = std::max (uint64_t {1 }, pDstImageDesc->height );
843
- cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
839
+ cpy_desc.WidthInBytes = pCopyRegion->copyExtent .width ;
844
840
cpy_desc.Height = std::max (uint64_t {1 }, pCopyRegion->copyExtent .height );
845
841
cpy_desc.Depth = pCopyRegion->copyExtent .depth ;
846
842
UR_CHECK_ERROR (cuMemcpy3DAsync (&cpy_desc, Stream));
@@ -874,11 +870,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
874
870
(CUdeviceptr)pDst) != CUDA_SUCCESS;
875
871
876
872
CUDA_MEMCPY2D cpy_desc = {};
877
- cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x * PixelSizeBytes ;
873
+ cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x ;
878
874
cpy_desc.srcY = 0 ;
879
- cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x * PixelSizeBytes ;
875
+ cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x ;
880
876
cpy_desc.dstY = 0 ;
881
- cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
877
+ cpy_desc.WidthInBytes = pCopyRegion->copyExtent .width ;
882
878
cpy_desc.Height = 1 ;
883
879
if (isSrcCudaArray) {
884
880
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -897,11 +893,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
897
893
UR_CHECK_ERROR (cuMemcpy2DAsync (&cpy_desc, Stream));
898
894
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
899
895
CUDA_MEMCPY2D cpy_desc = {};
900
- cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x * PixelSizeBytes ;
896
+ cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x ;
901
897
cpy_desc.srcY = pCopyRegion->srcOffset .y ;
902
- cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x * PixelSizeBytes ;
898
+ cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x ;
903
899
cpy_desc.dstY = pCopyRegion->dstOffset .y ;
904
- cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
900
+ cpy_desc.WidthInBytes = pCopyRegion->copyExtent .width ;
905
901
cpy_desc.Height = pCopyRegion->copyExtent .height ;
906
902
if (pSrcImageDesc->rowPitch == 0 ) {
907
903
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -924,35 +920,35 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
924
920
UR_CHECK_ERROR (cuMemcpy2DAsync (&cpy_desc, Stream));
925
921
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
926
922
CUDA_MEMCPY3D cpy_desc = {};
927
- cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x * PixelSizeBytes ;
923
+ cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x ;
928
924
cpy_desc.srcY = pCopyRegion->srcOffset .y ;
929
925
cpy_desc.srcZ = pCopyRegion->srcOffset .z ;
930
- cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x * PixelSizeBytes ;
926
+ cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x ;
931
927
cpy_desc.dstY = pCopyRegion->dstOffset .y ;
932
928
cpy_desc.dstZ = pCopyRegion->dstOffset .z ;
933
929
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
934
930
cpy_desc.srcArray = as_CUArray (pSrc);
935
931
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
936
932
cpy_desc.dstArray = (CUarray)pDst;
937
- cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
933
+ cpy_desc.WidthInBytes = pCopyRegion->copyExtent .width ;
938
934
cpy_desc.Height = pCopyRegion->copyExtent .height ;
939
935
cpy_desc.Depth = pCopyRegion->copyExtent .depth ;
940
936
UR_CHECK_ERROR (cuMemcpy3DAsync (&cpy_desc, Stream));
941
937
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE1D_ARRAY ||
942
938
pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY ||
943
939
pSrcImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) {
944
940
CUDA_MEMCPY3D cpy_desc = {};
945
- cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x * PixelSizeBytes ;
941
+ cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x ;
946
942
cpy_desc.srcY = pCopyRegion->srcOffset .y ;
947
943
cpy_desc.srcZ = pCopyRegion->srcOffset .z ;
948
- cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x * PixelSizeBytes ;
944
+ cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x ;
949
945
cpy_desc.dstY = pCopyRegion->dstOffset .y ;
950
946
cpy_desc.dstZ = pCopyRegion->dstOffset .z ;
951
947
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
952
948
cpy_desc.srcArray = as_CUArray (pSrc);
953
949
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
954
950
cpy_desc.dstArray = (CUarray)pDst;
955
- cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
951
+ cpy_desc.WidthInBytes = pCopyRegion->copyExtent .width ;
956
952
cpy_desc.Height = std::max (uint64_t {1 }, pCopyRegion->copyExtent .height );
957
953
cpy_desc.Depth = pCopyRegion->copyExtent .depth ;
958
954
UR_CHECK_ERROR (cuMemcpy3DAsync (&cpy_desc, Stream));
0 commit comments