@@ -633,6 +633,36 @@ run_ucx_perftest_with_daemon() {
633
633
done
634
634
}
635
635
636
+ #
637
+ # Run UCX performance cuda device test
638
+ #
639
+ run_ucx_perftest_cuda_device () {
640
+ if [ " X$have_cuda " != " Xyes" ]; then
641
+ echo " ==== CUDA not available, skipping CUDA device tests ===="
642
+ return 0
643
+ fi
644
+
645
+ if ! has_gpunetio_devel; then
646
+ echo " ==== DOCA not available, skipping CUDA device tests ===="
647
+ return 0
648
+ fi
649
+
650
+ if [ " $( get_num_gpus) " -eq 0 ]; then
651
+ echo " ==== No NVIDIA GPUs found, skipping CUDA device tests ===="
652
+ return 0
653
+ fi
654
+
655
+ echo " ==== Running ucx_perftest with cuda kernel ===="
656
+ ucx_inst_ptest=$ucx_inst /share/ucx/perftest
657
+ ucx_perftest=" $ucx_inst /bin/ucx_perftest"
658
+ ucp_test_args=" -b $ucx_inst_ptest /test_types_ucp_device_cuda"
659
+
660
+ # TODO: Run on all GPUs
661
+ ucp_client_args=" -a cuda $( hostname) "
662
+
663
+ run_client_server_app " $ucx_perftest " " $ucp_test_args " " $ucp_client_args " 0 0
664
+ }
665
+
636
666
#
637
667
# Test malloc hooks with mpi
638
668
#
@@ -1208,6 +1238,7 @@ run_tests() {
1208
1238
do_distributed_task 3 4 run_ucp_client_server
1209
1239
do_distributed_task 0 4 test_no_cuda_context
1210
1240
do_distributed_task 1 4 run_ucx_perftest_with_daemon
1241
+ do_distributed_task 1 4 run_ucx_perftest_cuda_device
1211
1242
1212
1243
# long devel tests
1213
1244
do_distributed_task 0 4 run_ucp_hello
0 commit comments