Skip to content
Merged
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ Manjunath Gorentla Venkata <[email protected]>
Marek Schimara <[email protected]>
Mark Allen <[email protected]>
Matthew Baker <[email protected]>
Michael Braverman <[email protected]>
Michal Shalev <[email protected]>
Mike Dubman <[email protected]>
Mikhail Brinskii <[email protected]>
Expand Down
6 changes: 6 additions & 0 deletions buildlib/az-distro-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ jobs:
ubuntu24_cuda12_${{ parameters.arch }}:
build_container: ubuntu24_cuda12_${{ parameters.arch }}
artifact_name: $(POSTFIX)-ubuntu24.04-mofed5-cuda12-${{ parameters.arch }}.tar.bz2
ubuntu22_cuda13_${{ parameters.arch }}:
build_container: ubuntu22_cuda13_${{ parameters.arch }}
artifact_name: $(POSTFIX)-ubuntu22.04-mofed5-cuda13-${{ parameters.arch }}.tar.bz2
ubuntu24_cuda13_${{ parameters.arch }}:
build_container: ubuntu24_cuda13_${{ parameters.arch }}
artifact_name: $(POSTFIX)-ubuntu24.04-mofed5-cuda13-${{ parameters.arch }}.tar.bz2
# x86 only
${{ if eq(parameters.arch, 'x86_64') }}:
centos7_cuda11_${{ parameters.arch }}:
Expand Down
5 changes: 2 additions & 3 deletions buildlib/az-helpers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -199,15 +199,14 @@ try_load_cuda_env() {
have_cuda="${cuda_local_dir}"
else
# Fallback to env module
az_module_load dev/cuda12.8 || return 0
az_module_load dev/cuda13.0.0 || return 0
have_cuda=yes
fi

# Check gdrcopy
if [ -w "/dev/gdrdrv" ]
then
# TODO detect cuda version if using local CUDA
az_module_load dev/gdrcopy2.4.4_cuda12.8.0 && have_gdrcopy=yes
az_module_load dev/gdrcopy2.5.1_cuda13.0.0 && have_gdrcopy=yes
fi
}

Expand Down
8 changes: 8 additions & 0 deletions buildlib/azure-pipelines-release-drp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ resources:
image: $(REPO_MIRROR)/ucx/x86_64/ubuntu20.04-mofed5-cuda12:3
- container: ubuntu24_cuda12_x86_64
image: $(REPO_MIRROR)/ucx/x86_64/ubuntu24.04-mofed24.10-cuda12.5:1
- container: ubuntu22_cuda13_x86_64
image: $(REPO_MIRROR)/ucx/x86_64/ubuntu22.04-mofed5-cuda13:1
- container: ubuntu24_cuda13_x86_64
image: $(REPO_MIRROR)/ucx/x86_64/ubuntu24.04-mofed24.10-cuda13:1

# aarch64
- container: centos8_cuda11_aarch64
Expand All @@ -61,6 +65,10 @@ resources:
image: $(REPO_MIRROR)/ucx/aarch64/ubuntu22.04-mofed5-cuda12:3
- container: ubuntu24_cuda12_aarch64
image: $(REPO_MIRROR)/ucx/aarch64/ubuntu24.04-mofed24.10-cuda12.5:1
- container: ubuntu22_cuda13_aarch64
image: $(REPO_MIRROR)/ucx/aarch64/ubuntu22.04-mofed5-cuda13:1
- container: ubuntu24_cuda13_aarch64
image: $(REPO_MIRROR)/ucx/aarch64/ubuntu24.04-mofed24.10-cuda13:1

stages:
- stage: Prepare
Expand Down
8 changes: 8 additions & 0 deletions buildlib/azure-pipelines-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ resources:
image: rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/ubuntu22.04-mofed5-cuda12:3
- container: ubuntu24_cuda12_x86_64
image: rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/ubuntu24.04-mofed24.10-cuda12.5:1
- container: ubuntu22_cuda13_x86_64
image: rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/ubuntu22.04-mofed5-cuda13:1
- container: ubuntu24_cuda13_x86_64
image: rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/ubuntu24.04-mofed24.10-cuda13:1

# aarch64
- container: centos8_cuda11_aarch64
Expand All @@ -57,6 +61,10 @@ resources:
image: rdmz-harbor.rdmz.labs.mlnx/ucx/aarch64/ubuntu22.04-mofed5-cuda12:3
- container: ubuntu24_cuda12_aarch64
image: rdmz-harbor.rdmz.labs.mlnx/ucx/aarch64/ubuntu24.04-mofed24.10-cuda12.5:1
- container: ubuntu22_cuda13_aarch64
image: rdmz-harbor.rdmz.labs.mlnx/ucx/aarch64/ubuntu22.04-mofed5-cuda13:1
- container: ubuntu24_cuda13_aarch64
image: rdmz-harbor.rdmz.labs.mlnx/ucx/aarch64/ubuntu24.04-mofed24.10-cuda13:1

stages:
- stage: Prepare
Expand Down
25 changes: 25 additions & 0 deletions buildlib/dockers/docker-compose-aarch64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,28 @@ services:
CUDA_VERSION: 12.5.1
NV_DRIVER_VERSION: 555
ARCH: aarch64

ubuntu22.04-mofed5-cuda13:
image: ubuntu22.04-mofed5-cuda13:1
build:
context: .
network: host
dockerfile: ubuntu-release.Dockerfile
args:
MOFED_VERSION: 5.8-7.0.6.1
UBUNTU_VERSION: 22.04
CUDA_VERSION: 13.0.0
NV_DRIVER_VERSION: 580
ARCH: aarch64
ubuntu24.04-mofed5-cuda13:
image: ubuntu24.04-mofed24.10-cuda13:1
build:
context: .
network: host
dockerfile: ubuntu-release.Dockerfile
args:
MOFED_VERSION: 24.10-3.2.5.0
UBUNTU_VERSION: 24.04
CUDA_VERSION: 13.0.0
NV_DRIVER_VERSION: 580
ARCH: aarch64
26 changes: 26 additions & 0 deletions buildlib/dockers/docker-compose-x86_64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,29 @@ services:
CUDA_VERSION: 12.5.1
NV_DRIVER_VERSION: 555
ARCH: x86_64


ubuntu22.04-mofed5-cuda13:
image: ubuntu22.04-mofed5-cuda13:1
build:
context: .
network: host
dockerfile: ubuntu-release.Dockerfile
args:
MOFED_VERSION: 5.8-7.0.6.1
UBUNTU_VERSION: 22.04
CUDA_VERSION: 13.0.0
NV_DRIVER_VERSION: 580
ARCH: x86_64
ubuntu24.04-mofed5-cuda13:
image: ubuntu24.04-mofed24.10-cuda13:1
build:
context: .
network: host
dockerfile: ubuntu-release.Dockerfile
args:
MOFED_VERSION: 24.10-3.2.5.0
UBUNTU_VERSION: 24.04
CUDA_VERSION: 13.0.0
NV_DRIVER_VERSION: 580
ARCH: x86_64
4 changes: 4 additions & 0 deletions buildlib/pr/cuda/cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ jobs:
CONTAINER: ubuntu22_cuda_12_0
ubuntu22_cuda_12_1:
CONTAINER: ubuntu22_cuda_12_1
ubuntu22_cuda_13_0:
CONTAINER: ubuntu22_cuda_13_0
ubuntu24_cuda_13_0:
CONTAINER: ubuntu24_cuda_13_0

container: $[ variables['CONTAINER'] ]
timeoutInMinutes: 35
Expand Down
12 changes: 12 additions & 0 deletions buildlib/pr/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,12 @@ resources:
- container: ubuntu22_cuda_12_1
image: nvidia/cuda:12.1.0-devel-ubuntu22.04
options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_GPU)
- container: ubuntu22_cuda_13_0
image: nvidia/cuda:13.0.0-devel-ubuntu22.04
options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_GPU)
- container: ubuntu24_cuda_13_0
image: nvidia/cuda:13.0.0-devel-ubuntu24.04
options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_GPU)
- container: centos8_cuda11
image: rdmz-harbor.rdmz.labs.mlnx/ucx/centos8-mofed5-cuda11:1
options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) $(DOCKER_OPT_GPU)
Expand All @@ -186,6 +192,12 @@ resources:
- container: ubuntu22_cuda12
image: rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/ubuntu22.04-mofed5-cuda12:3
options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) $(DOCKER_OPT_GPU)
- container: ubuntu22_cuda13
image: rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/ubuntu22.04-mofed5-cuda13:1
options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) $(DOCKER_OPT_GPU)
- container: ubuntu24_cuda13
image: rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/ubuntu24.04-mofed24.10-cuda13:1
options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) $(DOCKER_OPT_GPU)
- container: ubuntu2204_rocm_6_0_0
image: rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/ubuntu2204:rocm-6.0.0
options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES)
Expand Down
4 changes: 2 additions & 2 deletions buildlib/tools/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ WORKSPACE=${WORKSPACE:=$PWD}
# build in local directory which goes away when docker exits
ucx_build_dir=$HOME/${BUILD_ID}/build
ucx_inst=$ucx_build_dir/install
CUDA_MODULE="dev/cuda12.8"
GDRCOPY_MODULE="dev/gdrcopy2.4.4_cuda12.8.0"
CUDA_MODULE="dev/cuda13.0.0"
GDRCOPY_MODULE="dev/gdrcopy2.5.1_cuda13.0.0"
JDK_MODULE="dev/jdk"
MVN_MODULE="dev/mvn"
XPMEM_MODULE="dev/xpmem-90a95a4"
Expand Down
2 changes: 1 addition & 1 deletion buildlib/tools/perf-common.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ steps:

case "${{ parameters.Name }}" in
"Build-UCX")
module="/hpc/local/etc/modulefiles/dev/cuda12.8"
module="/hpc/local/etc/modulefiles/dev/cuda13.0.0"
perfxParams=(--skip-run --source-branch $(Build.SourceBranch) --omb-cuda)
;;
"Perf-test-multi-node")
Expand Down
4 changes: 4 additions & 0 deletions test/gtest/uct/cuda/test_switch_cuda_device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,10 @@ class test_p2p_send_on_diff_device : public uct_p2p_test {
protected:
void init() override
{
if (!mem_buffer::is_mem_type_supported(UCS_MEMORY_TYPE_CUDA)) {
UCS_TEST_SKIP_R("CUDA is not supported");
}

ASSERT_EQ(cudaGetDeviceCount(&m_num_devices), cudaSuccess);
if (m_num_devices < 2) {
UCS_TEST_SKIP_R("less than two cuda devices available");
Expand Down
Loading