VectorInstitute
diff --git a/‎.github/workflows/code_checks.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/code_checks.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/docker.yml‎
Lines changed: 7 additions & 2 deletions b/‎.github/workflows/docker.yml‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎.github/workflows/docs.yml‎
Lines changed: 7 additions & 7 deletions b/‎.github/workflows/docs.yml‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎.github/workflows/publish.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/publish.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/unit_tests.yml‎
Lines changed: 5 additions & 5 deletions b/‎.github/workflows/unit_tests.yml‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Dockerfile‎
Lines changed: 12 additions & 8 deletions b/‎Dockerfile‎
Lines changed: 12 additions & 8 deletions
diff --git a/‎MODEL_TRACKING.md‎
Lines changed: 8 additions & 2 deletions b/‎MODEL_TRACKING.md‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 3 additions & 3 deletions b/‎README.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/index.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/index.md‎
Lines changed: 1 addition & 1 deletion
@@ -30,7 +30,7 @@ jobs:
     steps:
       - uses: actions/[email protected]
       - name: Install uv
-        uses: astral-sh/setup-uv@v6
+        uses: astral-sh/setup-uv@v7
         with:
           # Install a specific version of uv.
           version: "0.5.21"
@@ -40,7 +40,7 @@ jobs:
         with:
           python-version-file: ".python-version"
       - name: Install the project
-        run: uv sync --dev
+        run: uv sync --dev --prerelease=allow
       - name: Install dependencies and check code
         run: |
           source .venv/bin/activate
 
@@ -21,7 +21,9 @@ on:
 jobs:
   push_to_registry:
     name: Push Docker image to Docker Hub
-    runs-on: ubuntu-latest
+    runs-on:
+      - self-hosted
+      - docker
     steps:
       - name: Checkout repository
         uses: actions/[email protected]
@@ -32,6 +34,9 @@ jobs:
           VERSION=$(grep -A 1 'name = "vllm"' uv.lock | grep version | cut -d '"' -f 2)
           echo "version=$VERSION" >> $GITHUB_OUTPUT
 
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
       - name: Log in to Docker Hub
         uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
         with:
@@ -40,7 +45,7 @@ jobs:
 
       - name: Extract metadata (tags, labels) for Docker
         id: meta
-        uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893
         with:
           images: vectorinstitute/vector-inference
 
 
@@ -56,7 +56,7 @@ jobs:
           fetch-depth: 0  # Fetch all history for proper versioning
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v6
+        uses: astral-sh/setup-uv@v7
         with:
           version: "0.5.21"
           enable-cache: true
@@ -67,16 +67,16 @@ jobs:
           python-version-file: ".python-version"
 
       - name: Install the project
-        run: uv sync --all-extras --group docs
+        run: uv sync --all-extras --group docs --prerelease=allow
 
       - name: Build docs
-        run: uv run mkdocs build
+        run: uv run --frozen mkdocs build
 
       - name: Create .nojekyll file
         run: touch site/.nojekyll
 
       - name: Upload artifact
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v5
         with:
           name: docs-site
           path: site/
@@ -93,7 +93,7 @@ jobs:
           fetch-depth: 0  # Fetch all history for proper versioning
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v6
+        uses: astral-sh/setup-uv@v7
         with:
           version: "0.5.21"
           enable-cache: true
@@ -104,15 +104,15 @@ jobs:
           python-version-file: ".python-version"
 
       - name: Install the project
-        run: uv sync --all-extras --group docs
+        run: uv sync --all-extras --group docs --frozen
 
       - name: Configure Git Credentials
         run: |
           git config user.name github-actions[bot]
           git config user.email 41898282+github-actions[bot]@users.noreply.github.com
 
       - name: Download artifact
-        uses: actions/download-artifact@v5
+        uses: actions/download-artifact@v6
         with:
           name: docs-site
           path: site
 
@@ -16,7 +16,7 @@ jobs:
       - uses: actions/[email protected]
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v6
+        uses: astral-sh/setup-uv@v7
         with:
           version: "0.6.6"
           enable-cache: true
 
@@ -46,7 +46,7 @@ jobs:
       - uses: actions/[email protected]
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v6
+        uses: astral-sh/setup-uv@v7
         with:
           # Install a specific version of uv.
           version: "0.5.21"
@@ -58,18 +58,18 @@ jobs:
           python-version: ${{ matrix.python-version }}
 
       - name: Install the project
-        run: uv sync --dev
+        run: uv sync --dev --prerelease=allow
 
       - name: Install dependencies and check code
         run: |
-          uv run pytest -m "not integration_test" --cov vec_inf --cov-report=xml tests
+          uv run --frozen pytest -m "not integration_test" --cov vec_inf --cov-report=xml tests
 
       - name: Install the core package only
         run: uv sync --no-dev
 
       - name: Run package import tests
         run: |
-          uv run pytest tests/test_imports.py
+          uv run --frozen pytest tests/test_imports.py
 
       - name: Import Codecov GPG public key
         run: |
@@ -79,7 +79,7 @@ jobs:
         uses: codecov/[email protected]
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
-          file: ./coverage.xml
+          files: ./coverage.xml
           name: codecov-umbrella
           fail_ci_if_error: true
           verbose: true
@@ -17,7 +17,7 @@ repos:
     - id: check-toml
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: 'v0.13.2'
+    rev: 'v0.14.4'
     hooks:
     - id: ruff
       args: [--fix, --exit-non-zero-on-fix]
 
@@ -35,29 +35,33 @@ RUN wget https://bootstrap.pypa.io/get-pip.py && \
     rm get-pip.py && \
     python3.10 -m pip install --upgrade pip setuptools wheel uv
 
-# Install Infiniband/RDMA support
+# Install RDMA support
 RUN apt-get update && apt-get install -y \
     libibverbs1 libibverbs-dev ibverbs-utils \
     librdmacm1 librdmacm-dev rdmacm-utils \
+    rdma-core ibverbs-providers infiniband-diags perftest \
     && rm -rf /var/lib/apt/lists/*
 
 # Set up RDMA environment (these will persist in the final container)
 ENV LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH"
-ENV UCX_NET_DEVICES=all
 ENV NCCL_IB_DISABLE=0
+ENV NCCL_SOCKET_IFNAME="^lo,docker0"
+ENV NCCL_NET_GDR_LEVEL=PHB
+ENV NCCL_IB_TIMEOUT=22
+ENV NCCL_IB_RETRY_CNT=7
+ENV NCCL_DEBUG=INFO
 
 # Set up project
 WORKDIR /vec-inf
 COPY . /vec-inf
 
 # Install project dependencies with build requirements
-RUN PIP_INDEX_URL="https://download.pytorch.org/whl/cu128" uv pip install --system -e .[dev]
+RUN uv pip install --system -e .[dev] --prerelease=allow
 
-# Final configuration
-RUN mkdir -p /vec-inf/nccl && \
-    mv /root/.config/vllm/nccl/cu12/libnccl.so.2.18.1 /vec-inf/nccl/libnccl.so.2.18.1
-ENV VLLM_NCCL_SO_PATH=/vec-inf/nccl/libnccl.so.2.18.1
-ENV NCCL_DEBUG=INFO
+# Install a single, system NCCL (from NVIDIA CUDA repo in base image)
+RUN apt-get update && apt-get install -y --allow-change-held-packages\
+    libnccl2 libnccl-dev \
+    && rm -rf /var/lib/apt/lists/*
 
 # Set the default command to start an interactive shell
 CMD ["bash"]
@@ -40,6 +40,7 @@ This document tracks all model weights available in the `/model-weights` directo
 | `gemma-2b-it` | ❌ |
 | `gemma-7b` | ❌ |
 | `gemma-7b-it` | ❌ |
+| `gemma-2-2b-it` | ✅ |
 | `gemma-2-9b` | ✅ |
 | `gemma-2-9b-it` | ✅ |
 | `gemma-2-27b` | ✅ |
@@ -165,8 +166,8 @@ This document tracks all model weights available in the `/model-weights` directo
 | Model | Configuration |
 |:------|:-------------|
 | `Qwen3-14B` | ✅ |
-| `Qwen3-8B` | ❌ |
-| `Qwen3-32B` | ❌ |
+| `Qwen3-8B` | ✅ |
+| `Qwen3-32B` | ✅ |
 | `Qwen3-235B-A22B` | ❌ |
 | `Qwen3-Embedding-8B` | ❌ |
 
@@ -186,6 +187,11 @@ This document tracks all model weights available in the `/model-weights` directo
 | `DeepSeek-Coder-V2-Lite-Instruct` | ❌ |
 | `deepseek-math-7b-instruct` | ❌ |
 
+### OpenAI: GPT-OSS
+| Model | Configuration |
+|:------|:-------------|
+| `gpt-oss-120b` | ✅ |
+
 ### Other LLM Models
 | Model | Configuration |
 |:------|:-------------|
 
@@ -7,7 +7,7 @@
 [![code checks](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
 [![docs](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml)
 [![codecov](https://codecov.io/github/VectorInstitute/vector-inference/branch/main/graph/badge.svg?token=NI88QSIGAC)](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/main)
-[![vLLM](https://img.shields.io/badge/vLLM-0.10.1.1-blue)](https://docs.vllm.ai/en/v0.10.1.1/)
+[![vLLM](https://img.shields.io/badge/vLLM-0.11.0-blue)](https://docs.vllm.ai/en/v0.11.0/)
 ![GitHub License](https://img.shields.io/github/license/VectorInstitute/vector-inference)
 
 This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
@@ -20,7 +20,7 @@ If you are using the Vector cluster environment, and you don't need any customiz
 ```bash
 pip install vec-inf
 ```
-Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.10.1.1`.
+Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
 
 If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
 * Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](vec_inf/config/), then install from source by running `pip install .`.
@@ -53,7 +53,7 @@ Models that are already supported by `vec-inf` would be launched using the cache
 #### Other commands
 
 * `batch-launch`: Launch multiple model inference servers at once, currently ONLY single node models supported,
-* `status`: Check the model status by providing its Slurm job ID.
+* `status`: Check the status of all `vec-inf` jobs, or a specific job by providing its job ID.
 * `metrics`: Streams performance metrics to the console.
 * `shutdown`: Shutdown a model by providing its Slurm job ID.
 * `list`: List all available model names, or view the default/cached configuration of a specific model.
 
@@ -12,7 +12,7 @@ If you are using the Vector cluster environment, and you don't need any customiz
 pip install vec-inf
 ```
 
-Otherwise, we recommend using the provided [`Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.10.1.1`.
+Otherwise, we recommend using the provided [`Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
 
 If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
 * Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/config), then install from source by running `pip install .`.