Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 44 additions & 113 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,55 +19,21 @@ jobs:
finalize:
timeout-minutes: 10
needs:
- unit-tests
- example-pull-gcr
- test-htc
# Important: the next line MUST be `if: always()`.
# Do not change that line.
# That line is necessary to make sure that this job runs even if tests fail.
if: always()
runs-on: ubuntu-latest
steps:
- run: |
echo unit-tests: ${{ needs.unit-tests.result }}
echo example-pull-gcr: ${{ needs.example-pull-gcr.result }}
echo test-htc: ${{ needs.test-htc.result }}
- run: exit 1
# The last line must NOT end with ||
# All other lines MUST end with ||
if: |
(needs.unit-tests.result != 'success') ||
(needs.example-pull-gcr.result != 'success')
unit-tests:
runs-on: ubuntu-latest
timeout-minutes: 20
strategy:
fail-fast: false
matrix:
version:
- '1.2' # minimum Julia version supported in Project.toml
- '1.6' # previous LTS
- '1.10' # current LTS
- '1' # automatically expands to the latest stable 1.x release of Julia
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: julia-actions/setup-julia@v2
with:
version: ${{ matrix.version }}
- uses: julia-actions/julia-runtest@v1
- run: find . -type f -name '*.cov'
# - run: find . -type f -name '*.c ov' -exec cat {} \;
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v5
with:
files: lcov.info
token: ${{ secrets.CODECOV_TOKEN }}
# If this PR is from a fork, then do NOT fail CI if the Codecov upload errors.
# If this PR is NOT from a fork, then DO fail CI if the Codecov upload errors.
# If this is not a PR, then DO fail CI if the Codecov upload errors.
fail_ci_if_error: ${{ github.event_name != 'pull_request' || github.repository == github.event.pull_request.head.repo.full_name }}
test-slurm:
if: false
(needs.test-htc.result != 'success')
test-htc:
runs-on: ubuntu-latest
timeout-minutes: 20
strategy:
Expand All @@ -76,82 +42,47 @@ jobs:
version:
# Please note: You must specify the full Julia version number (major.minor.patch).
# This is because the value here will be directly interpolated into a download URL.
# - '1.2.0' # minimum Julia version supported in Project.toml
- '1.0.5' # minimum Julia version supported in Project.toml
- '1.6.7' # previous LTS
- '1.10.7' # current LTS
- '1.11.2' # currently the latest stable release
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- name: Print Docker version
run: |
docker --version
docker version
# This next bit of code is taken from:
# https://github.com/kleinhenz/SlurmClusterManager.jl
# Original author: Joseph Kleinhenz
# License: MIT
- name: Setup Slurm inside Docker
run: |
docker version
docker compose version
docker build --build-arg "JULIA_VERSION=${MATRIX_JULIA_VERSION:?}" -t slurm-cluster-julia -f ci/Dockerfile .
docker compose -f ci/docker-compose.yml up -d
docker ps
env:
MATRIX_JULIA_VERSION: ${{matrix.version}}
- name: Print some information for debugging purposes
run: |
docker exec -t slurmctld pwd
docker exec -t slurmctld ls -la
docker exec -t slurmctld ls -la HTCondorClusterManager
- name: Instantiate package
run: docker exec -t slurmctld julia --project=HTCondorClusterManager -e 'import Pkg; @show Base.active_project(); Pkg.instantiate(); Pkg.status()'
- name: Run tests without a Slurm allocation
run: docker exec -t slurmctld julia --project=HTCondorClusterManager -e 'import Pkg; Pkg.test(; test_args=["slurm"])'
- name: Run tests inside salloc
run: docker exec -t slurmctld salloc -t 00:10:00 -n 2 julia --project=HTCondorClusterManager -e 'import Pkg; Pkg.test(; test_args=["slurm"], coverage=true)'
- name: Run tests inside sbatch
run: docker exec -t slurmctld HTCondorClusterManager/ci/run_my_sbatch.sh
- run: find . -type f -name '*.cov'
- name: Copy .cov files out of the Docker container
run: docker exec slurmctld /bin/bash -c 'cd /home/docker/HTCondorClusterManager && tar -cf - src/*.cov' | tar -xvf -
- run: find . -type f -name '*.cov'
# - run: find . -type f -name '*.cov' -exec cat {} \;
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v5
with:
files: lcov.info
token: ${{ secrets.CODECOV_TOKEN }}
# If this PR is from a fork, then do NOT fail CI if the Codecov upload errors.
# If this PR is NOT from a fork, then DO fail CI if the Codecov upload errors.
# If this is not a PR, then DO fail CI if the Codecov upload errors.
fail_ci_if_error: ${{ github.event_name != 'pull_request' || github.repository == github.event.pull_request.head.repo.full_name }}
example-pull-gcr:
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- name: Print Docker version
run: |
docker --version
docker version
# - uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
# if: false
# with:
# registry: ghcr.io
# username: ${{ github.actor }}
# password: ${{ secrets.GITHUB_TOKEN }}
- name: Docker pull
run: |
docker pull "ghcr.io/${MY_GCR_NAMESPACE:?}/${MY_GCR_IMAGENAME:?}@${MY_DIGEST_HTCONDOR_EXECUTE:?}"
env:
# We intentionally pull by full-length digest (NOT tag) for reproducibility:
MY_DIGEST_HTCONDOR_EXECUTE: 'sha256:d4384c19cdb2f26bae15b1feef3a12bde66554658562df2626c03ae870003555' # htcondor-execute
- name: List images
run: |
docker ps -a
docker images
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: julia-actions/setup-julia@v2
with:
version: ${{ matrix.version }}
- run: docker version
- run: docker compose version
- run: docker compose pull
working-directory: ci/htcondor
- run: docker compose build --build-arg JULIA_VERSION="${MATRIX_VERSION:?}"
working-directory: ci/htcondor
env:
MATRIX_VERSION: ${{matrix.version}}
- run: ./start-htcondor.sh
working-directory: ci/htcondor
- run: docker compose exec -T --user submituser submit condor_submit --help
working-directory: ci/htcondor
- run: docker compose exec -T --user submituser submit julia --version
working-directory: ci/htcondor
- run: docker compose exec -T --user submituser submit julia --project=/SlurmClusterManager -e 'import Pkg; Pkg.test()'
working-directory: ci/htcondor
- run: find . -type f -name '*.cov'
- name: Copy .cov files out of the Docker container
run: |
# docker compose exec -T --user submituser submit /bin/bash -c 'cd ~/HTCondorClusterManager && tar -cf - src/*.cov' | tar -xvf -
docker compose exec -T --user submituser execute1 /bin/bash -c 'cd ~/HTCondorClusterManager && tar -cf - src/*.cov' | tar -xvf -
# docker compose exec -T --user submituser execute2 /bin/bash -c 'cd ~/HTCondorClusterManager && tar -cf - src/*.cov' | tar -xvf -
- run: find . -type f -name '*.cov'
# - run: find . -type f -name '*.cov' -exec cat {} \;
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v5
with:
files: lcov.info
token: ${{ secrets.CODECOV_TOKEN }}
# If this PR is from a fork, then do NOT fail CI if the Codecov upload errors.
# If this PR is NOT from a fork, then DO fail CI if the Codecov upload errors.
# If this is not a PR, then DO fail CI if the Codecov upload errors.
fail_ci_if_error: ${{ github.event_name != 'pull_request' || github.repository == github.event.pull_request.head.repo.full_name }}
21 changes: 0 additions & 21 deletions ci/Dockerfile

This file was deleted.

48 changes: 0 additions & 48 deletions ci/docker-compose.yml

This file was deleted.

43 changes: 43 additions & 0 deletions ci/htcondor/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# We intentionally use full-length digests (NOT tags) for reproducibility.
# FROM ghcr.io/juliaparallel/dask-jobqueue-ci-images:htcondor-submit as submit
FROM ghcr.io/juliaparallel/dask-jobqueue-ci-images@sha256:5ada6445b5d8b53b6693ab86be364dd1ce385ada8e53763731ba50d145f0350d as submit

# We intentionally have no default value:
ARG JULIA_VERSION

RUN mkdir -p /home/docker/.local/opt/julia \
&& cd /home/docker/.local/opt/julia \
&& folder="$(echo 1.11.2 | cut -d. -f1-2)" \
&& curl -L https://julialang-s3.julialang.org/bin/linux/x64/1.11/julia-1.11.2-linux-x86_64.tar.gz | tar xz --strip 1 \
&& /home/docker/.local/opt/julia/bin/julia --version

ENV PATH="/home/docker/.local/opt/julia/bin:${PATH}"

RUN bash -c "pwd"

COPY --chown=docker . SlurmClusterManager
RUN chmod -R u+rw,g+rw,o+rw SlurmClusterManager

SHELL ["conda", "run", "-n", "dask-jobqueue", "/bin/bash", "-c"]

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# We intentionally use full-length digests (NOT tags) for reproducibility.
# FROM ghcr.io/juliaparallel/dask-jobqueue-ci-images:htcondor-execute as execute
FROM ghcr.io/juliaparallel/dask-jobqueue-ci-images@sha256:5723d0380f627779bc54a31ebac9a77f0937189453f597845411257dea6ac0db as execute

# We intentionally have no default value:
ARG JULIA_VERSION

RUN mkdir -p /home/docker/.local/opt/julia \
&& cd /home/docker/.local/opt/julia \
&& folder="$(echo 1.11.2 | cut -d. -f1-2)" \
&& curl -L https://julialang-s3.julialang.org/bin/linux/x64/1.11/julia-1.11.2-linux-x86_64.tar.gz | tar xz --strip 1 \
&& /home/docker/.local/opt/julia/bin/julia --version

ENV PATH="/home/docker/.local/opt/julia/bin:${PATH}"

RUN bash -c "pwd"

COPY --chown=docker . SlurmClusterManager
RUN chmod -R u+rw,g+rw,o+rw SlurmClusterManager
77 changes: 77 additions & 0 deletions ci/htcondor/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# This file is based on:
# https://github.com/dask/dask-jobqueue/blob/main/ci/htcondor/docker-compose.yml
# License: BSD 3-Clause

# version: "3.4"

services:
cm:
# We intentionally use full-length digests (NOT tags) for reproducibility.
#
# TODO: mirror this images in our own GCR, instead of needing it to exist in Docker Hub.
# image: htcondor/cm:el7
image: htcondor/cm@sha256:71cfed5ffc1dc78cb725f571e6be6acdb50ca0322c9cc9cd500a965be7e402c6
hostname: cm.htcondor
environment:
- USE_POOL_PASSWORD=yes
volumes:
- secrets:/root/secrets
- ./condor_config.local:/etc/condor/condor_config.local
command: bash -c 'condor_store_cred -p password -f /root/secrets/pool_password ; exec bash -x /start.sh'

submit:
# image: ghcr.io/juliaparallel/dask-jobqueue-ci-images@sha256:5ada6445b5d8b53b6693ab86be364dd1ce385ada8e53763731ba50d145f0350d
build:
context: ../..
dockerfile: ci/htcondor/Dockerfile
target: submit
hostname: submit.htcondor
environment:
- CONDOR_HOST=cm
- USE_POOL_PASSWORD=yes
- CI_SHARED_SPACE=/shared_space
depends_on:
- cm
volumes:
- secrets:/root/secrets
- ../..:/dask-jobqueue
- ./condor_config.local:/etc/condor/condor_config.local
- shared_space:/shared_space

execute1:
# image: ghcr.io/juliaparallel/dask-jobqueue-ci-images@sha256:5723d0380f627779bc54a31ebac9a77f0937189453f597845411257dea6ac0db
build:
context: ../..
dockerfile: ci/htcondor/Dockerfile
target: execute
hostname: execute1.htcondor
environment:
- CONDOR_HOST=cm
- USE_POOL_PASSWORD=yes
depends_on:
- cm
volumes:
- secrets:/root/secrets
- ./condor_config.local:/etc/condor/condor_config.local
- shared_space:/shared_space

execute2:
# image: ghcr.io/juliaparallel/dask-jobqueue-ci-images@sha256:5723d0380f627779bc54a31ebac9a77f0937189453f597845411257dea6ac0db
build:
context: ../..
dockerfile: ci/htcondor/Dockerfile
target: execute
hostname: execute2.htcondor
environment:
- CONDOR_HOST=cm
- USE_POOL_PASSWORD=yes
depends_on:
- cm
volumes:
- secrets:/root/secrets
- ./condor_config.local:/etc/condor/condor_config.local
- shared_space:/shared_space

volumes:
secrets:
shared_space:
14 changes: 14 additions & 0 deletions ci/htcondor/start-htcondor.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash

# This file is based on:
# https://github.com/dask/dask-jobqueue/blob/main/ci/htcondor/docker-compose.yml
# License: BSD 3-Clause

docker compose up -d --no-build

while [ `docker compose exec -T submit condor_status -af activity|grep Idle|wc -l` -ne 2 ]
do
echo "Waiting for cluster to become ready";
sleep 2
done
echo "HTCondor properly configured"
Loading
Loading