From 19c6c028792171e69100a08f4693e49f9ec9e70d Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 2 Sep 2025 13:54:57 +0100 Subject: [PATCH 1/7] Use CUDA 13.0 on CI --- .github/workflows/docs.yaml | 2 +- .github/workflows/linux_cuda_wheel.yaml | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 7d580cef3..ac63e9d81 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -55,7 +55,7 @@ jobs: # the wheel unless the label cliflow/binaries/all is present in the # PR. python-version: ['3.10'] - cuda-version: ['12.6'] + cuda-version: ['12.8'] ffmpeg-version-for-tests: ['7'] container: image: "pytorch/manylinux2_28-builder:cuda${{ matrix.cuda-version }}" diff --git a/.github/workflows/linux_cuda_wheel.yaml b/.github/workflows/linux_cuda_wheel.yaml index 4e92a9095..d10c65d5f 100644 --- a/.github/workflows/linux_cuda_wheel.yaml +++ b/.github/workflows/linux_cuda_wheel.yaml @@ -67,10 +67,9 @@ jobs: # For the actual release we should add that label and change this to # include more python versions. python-version: ['3.10'] - # We test against 12.6 to avoid having too big of a CI matrix, + # We test against 12.6 and 13.0 to avoid having too big of a CI matrix, # but for releases we should add 12.8. - # TODO add 13.0! - cuda-version: ['12.6'] + cuda-version: ['12.6', '13.0'] # TODO: put back ffmpeg 5 https://github.com/pytorch/torchcodec/issues/325 ffmpeg-version-for-tests: ['4.4.2', '6', '7'] From a375646235f502a23134dbd718f0a34788e66d74 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 2 Sep 2025 14:54:24 +0100 Subject: [PATCH 2/7] Fix libnpp? --- .github/workflows/linux_cuda_wheel.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linux_cuda_wheel.yaml b/.github/workflows/linux_cuda_wheel.yaml index d10c65d5f..793d2e0ab 100644 --- a/.github/workflows/linux_cuda_wheel.yaml +++ b/.github/workflows/linux_cuda_wheel.yaml @@ -94,7 +94,7 @@ jobs: python-version: ${{ matrix.python-version }} # We install conda packages at the start because otherwise conda may have conflicts with dependencies. # Note: xorg-libxau was addded to fix a problem with ffmpeg 4. We should consider removing it. - default-packages: "nvidia/label/cuda-${{ matrix.cuda-version }}.0::libnpp nvidia::cuda-nvrtc=${{ matrix.cuda-version }} nvidia::cuda-toolkit=${{ matrix.cuda-version }} nvidia::cuda-cudart=${{ matrix.cuda-version }} nvidia::cuda-driver-dev=${{ matrix.cuda-version }} conda-forge::ffmpeg=${{ matrix.ffmpeg-version-for-tests }} conda-forge::xorg-libxau" + default-packages: "nvidia/label/cuda-${{ matrix.cuda-version }}.0::libnpp=${{ matrix.cuda-version }} nvidia::cuda-nvrtc=${{ matrix.cuda-version }} nvidia::cuda-toolkit=${{ matrix.cuda-version }} nvidia::cuda-cudart=${{ matrix.cuda-version }} nvidia::cuda-driver-dev=${{ matrix.cuda-version }} conda-forge::ffmpeg=${{ matrix.ffmpeg-version-for-tests }} conda-forge::xorg-libxau" - name: Check env run: | ${CONDA_RUN} env From 089a0896ca5745417c4c90f1aa8571b347d09b07 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 2 Sep 2025 15:28:15 +0100 Subject: [PATCH 3/7] debug --- .github/workflows/linux_cuda_wheel.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linux_cuda_wheel.yaml b/.github/workflows/linux_cuda_wheel.yaml index 793d2e0ab..dd088bd15 100644 --- a/.github/workflows/linux_cuda_wheel.yaml +++ b/.github/workflows/linux_cuda_wheel.yaml @@ -94,7 +94,7 @@ jobs: python-version: ${{ matrix.python-version }} # We install conda packages at the start because otherwise conda may have conflicts with dependencies. # Note: xorg-libxau was addded to fix a problem with ffmpeg 4. We should consider removing it. - default-packages: "nvidia/label/cuda-${{ matrix.cuda-version }}.0::libnpp=${{ matrix.cuda-version }} nvidia::cuda-nvrtc=${{ matrix.cuda-version }} nvidia::cuda-toolkit=${{ matrix.cuda-version }} nvidia::cuda-cudart=${{ matrix.cuda-version }} nvidia::cuda-driver-dev=${{ matrix.cuda-version }} conda-forge::ffmpeg=${{ matrix.ffmpeg-version-for-tests }} conda-forge::xorg-libxau" + default-packages: "nvidia/label/cuda-${{ matrix.cuda-version }}.0::libnpp nvidia/label/cuda-${{ matrix.cuda-version }}.0::cuda-version=${{ matrix.cuda-version }} nvidia::cuda-nvrtc=${{ matrix.cuda-version }} nvidia::cuda-toolkit=${{ matrix.cuda-version }} nvidia::cuda-cudart=${{ matrix.cuda-version }} nvidia::cuda-driver-dev=${{ matrix.cuda-version }} conda-forge::ffmpeg=${{ matrix.ffmpeg-version-for-tests }} conda-forge::xorg-libxau" - name: Check env run: | ${CONDA_RUN} env From 3cc77c34ef43aa27eb4d7e035825cea353a89114 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 4 Sep 2025 09:42:34 +0100 Subject: [PATCH 4/7] revert stuff --- .github/workflows/linux_cuda_wheel.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linux_cuda_wheel.yaml b/.github/workflows/linux_cuda_wheel.yaml index dd088bd15..d10c65d5f 100644 --- a/.github/workflows/linux_cuda_wheel.yaml +++ b/.github/workflows/linux_cuda_wheel.yaml @@ -94,7 +94,7 @@ jobs: python-version: ${{ matrix.python-version }} # We install conda packages at the start because otherwise conda may have conflicts with dependencies. # Note: xorg-libxau was addded to fix a problem with ffmpeg 4. We should consider removing it. - default-packages: "nvidia/label/cuda-${{ matrix.cuda-version }}.0::libnpp nvidia/label/cuda-${{ matrix.cuda-version }}.0::cuda-version=${{ matrix.cuda-version }} nvidia::cuda-nvrtc=${{ matrix.cuda-version }} nvidia::cuda-toolkit=${{ matrix.cuda-version }} nvidia::cuda-cudart=${{ matrix.cuda-version }} nvidia::cuda-driver-dev=${{ matrix.cuda-version }} conda-forge::ffmpeg=${{ matrix.ffmpeg-version-for-tests }} conda-forge::xorg-libxau" + default-packages: "nvidia/label/cuda-${{ matrix.cuda-version }}.0::libnpp nvidia::cuda-nvrtc=${{ matrix.cuda-version }} nvidia::cuda-toolkit=${{ matrix.cuda-version }} nvidia::cuda-cudart=${{ matrix.cuda-version }} nvidia::cuda-driver-dev=${{ matrix.cuda-version }} conda-forge::ffmpeg=${{ matrix.ffmpeg-version-for-tests }} conda-forge::xorg-libxau" - name: Check env run: | ${CONDA_RUN} env From e2d2153dcd51d4e67fba427a316a95e412937c97 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Mon, 29 Sep 2025 14:50:11 -0400 Subject: [PATCH 5/7] increase CUDA atol to 3 --- test/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/utils.py b/test/utils.py index 5b6d9ea76..abe808d0f 100644 --- a/test/utils.py +++ b/test/utils.py @@ -73,7 +73,7 @@ def psnr(a, b, max_val=255) -> float: def assert_frames_equal(*args, **kwargs): if sys.platform == "linux": if args[0].device.type == "cuda": - atol = 2 + atol = 3 if get_ffmpeg_major_version() == 4: assert_tensor_close_on_at_least( args[0], args[1], percentage=95, atol=atol From 4119cef2d929e2e656f0692cdc43fee6edc5ce0d Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Mon, 29 Sep 2025 15:13:21 -0400 Subject: [PATCH 6/7] revert docs job to cuda 12.6 --- .github/workflows/docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index ac63e9d81..7d580cef3 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -55,7 +55,7 @@ jobs: # the wheel unless the label cliflow/binaries/all is present in the # PR. python-version: ['3.10'] - cuda-version: ['12.8'] + cuda-version: ['12.6'] ffmpeg-version-for-tests: ['7'] container: image: "pytorch/manylinux2_28-builder:cuda${{ matrix.cuda-version }}" From d84bef10e3b863fdd965a5711deb7f74cda2c049 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Mon, 29 Sep 2025 15:29:26 -0400 Subject: [PATCH 7/7] increase atol in test_full_and_studio_range_bt709_video --- test/test_decoders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_decoders.py b/test/test_decoders.py index e68e4fe6e..ea4e91600 100644 --- a/test/test_decoders.py +++ b/test/test_decoders.py @@ -1224,7 +1224,7 @@ def test_full_and_studio_range_bt709_video(self, asset): cpu_frame = decoder_cpu.get_frame_at(frame_index).data if cuda_version_used_for_building_torch() >= (12, 9): - torch.testing.assert_close(gpu_frame, cpu_frame, rtol=0, atol=2) + torch.testing.assert_close(gpu_frame, cpu_frame, rtol=0, atol=3) elif cuda_version_used_for_building_torch() == (12, 8): assert psnr(gpu_frame, cpu_frame) > 20