Skip to content

Commit 57003ef

Browse files
authored
Migrate vLLM Ray Serve Container (#5463)
1 parent 66c6091 commit 57003ef

File tree

12 files changed

+967
-80
lines changed

12 files changed

+967
-80
lines changed

.github/scripts/runner_setup.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#!/bin/bash
22
set -e
33

4-
curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR="/usr/local/bin" sh
5-
uv self update
4+
if ! command -v uv &> /dev/null; then
5+
curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR="/usr/local/bin" sh
6+
uv self update
7+
fi
68
docker --version

.github/workflows/pr-example.yml

Lines changed: 0 additions & 65 deletions
This file was deleted.
Lines changed: 322 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
name: PR - vLLM RayServe
2+
3+
on:
4+
pull_request:
5+
branches:
6+
- main
7+
paths:
8+
- "docker/**"
9+
10+
permissions:
11+
contents: read
12+
13+
concurrency:
14+
group: pr-${{ github.event.pull_request.number }}
15+
cancel-in-progress: true
16+
17+
jobs:
18+
check-changes:
19+
runs-on: ubuntu-latest
20+
outputs:
21+
vllm-rayserve-ec2: ${{ steps.changes.outputs.vllm-rayserve-ec2 }}
22+
steps:
23+
- uses: actions/checkout@v5
24+
- uses: actions/setup-python@v6
25+
with:
26+
python-version: "3.12"
27+
- uses: pre-commit/[email protected]
28+
with:
29+
extra_args: --all-files
30+
- name: Detect file changes
31+
id: changes
32+
uses: dorny/paths-filter@v3
33+
with:
34+
filters: |
35+
vllm-rayserve-ec2:
36+
- "docker/vllm/Dockerfile.rayserve"
37+
38+
build-image:
39+
needs: [check-changes]
40+
if: needs.check-changes.outputs.vllm-rayserve-ec2 == 'true'
41+
runs-on:
42+
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
43+
fleet:x86-build-runner
44+
steps:
45+
- uses: actions/checkout@v5
46+
- run: .github/scripts/runner_setup.sh
47+
- run: .github/scripts/buildkitd.sh
48+
- name: ECR login
49+
run: |
50+
aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
51+
52+
- name: Resolve image URI for build
53+
run: |
54+
IMAGE_URI=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }}
55+
echo "Image URI to build: $IMAGE_URI"
56+
echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
57+
58+
- name: Build image
59+
run: |
60+
docker buildx build --progress plain \
61+
--build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \
62+
--cache-to=type=inline \
63+
--cache-from=type=registry,ref=$IMAGE_URI \
64+
--tag $IMAGE_URI \
65+
--target vllm-rayserve-ec2 \
66+
-f docker/vllm/Dockerfile.rayserve .
67+
68+
- name: Docker Push and save image URI artifact
69+
run: |
70+
docker push $IMAGE_URI
71+
docker rmi $IMAGE_URI
72+
echo $IMAGE_URI > image_uri.txt
73+
74+
- name: Upload image URI artifact
75+
uses: actions/upload-artifact@v4
76+
with:
77+
name: vllm-rayserve-ec2-image-uri
78+
path: image_uri.txt
79+
80+
regression-test:
81+
needs: [build-image]
82+
if: needs.build-image.result == 'success'
83+
runs-on:
84+
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
85+
fleet:x86-g6xl-runner
86+
steps:
87+
- name: Checkout DLC source
88+
uses: actions/checkout@v5
89+
90+
- name: ECR login
91+
run: |
92+
aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
93+
94+
- name: Download image URI artifact
95+
uses: actions/download-artifact@v4
96+
with:
97+
name: vllm-rayserve-ec2-image-uri
98+
99+
- name: Resolve image URI for test
100+
run: |
101+
IMAGE_URI=$(cat image_uri.txt)
102+
echo "Resolved image URI: $IMAGE_URI"
103+
echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
104+
105+
- name: Pull image
106+
run: |
107+
docker pull $IMAGE_URI
108+
109+
- name: Checkout vLLM Tests
110+
uses: actions/checkout@v5
111+
with:
112+
repository: vllm-project/vllm
113+
ref: v0.10.2
114+
path: vllm_source
115+
116+
- name: Start container
117+
run: |
118+
CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
119+
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
120+
-v ${HOME}/.cache/vllm:/root/.cache/vllm \
121+
-v ./vllm_source:/workdir --workdir /workdir \
122+
-e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
123+
${IMAGE_URI})
124+
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
125+
126+
- name: Setup for vLLM Test
127+
run: |
128+
docker exec ${CONTAINER_ID} sh -c '
129+
set -eux
130+
uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
131+
uv pip install --system pytest pytest-asyncio
132+
uv pip install --system -e tests/vllm_test_utils
133+
uv pip install --system hf_transfer
134+
mkdir src
135+
mv vllm src/vllm
136+
'
137+
138+
- name: Run vLLM Tests
139+
run: |
140+
docker exec ${CONTAINER_ID} sh -c '
141+
set -eux
142+
nvidia-smi
143+
144+
# Regression Test # 7min
145+
cd /workdir/tests
146+
uv pip install --system modelscope
147+
pytest -v -s test_regression.py
148+
'
149+
150+
- name: Cleanup container and images
151+
if: always()
152+
run: |
153+
docker rm -f ${CONTAINER_ID} || true
154+
docker image prune -a --force --filter "until=24h"
155+
docker system df
156+
157+
cuda-test:
158+
needs: [build-image]
159+
if: needs.build-image.result == 'success'
160+
runs-on:
161+
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
162+
fleet:x86-g6xl-runner
163+
steps:
164+
- name: Checkout DLC source
165+
uses: actions/checkout@v5
166+
167+
- name: ECR login
168+
run: |
169+
aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
170+
171+
- name: Download image URI artifact
172+
uses: actions/download-artifact@v4
173+
with:
174+
name: vllm-rayserve-ec2-image-uri
175+
176+
- name: Resolve image URI for test
177+
run: |
178+
IMAGE_URI=$(cat image_uri.txt)
179+
echo "Resolved image URI: $IMAGE_URI"
180+
echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
181+
182+
- name: Pull image
183+
run: |
184+
docker pull $IMAGE_URI
185+
186+
- name: Checkout vLLM Tests
187+
uses: actions/checkout@v5
188+
with:
189+
repository: vllm-project/vllm
190+
ref: v0.10.2
191+
path: vllm_source
192+
193+
- name: Start container
194+
run: |
195+
CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
196+
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
197+
-v ${HOME}/.cache/vllm:/root/.cache/vllm \
198+
-v ./vllm_source:/workdir --workdir /workdir \
199+
-e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
200+
${IMAGE_URI})
201+
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
202+
203+
- name: Setup for vLLM Test
204+
run: |
205+
docker exec ${CONTAINER_ID} sh -c '
206+
set -eux
207+
uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
208+
uv pip install --system pytest pytest-asyncio
209+
uv pip install --system -e tests/vllm_test_utils
210+
uv pip install --system hf_transfer
211+
mkdir src
212+
mv vllm src/vllm
213+
'
214+
215+
- name: Run vLLM Tests
216+
run: |
217+
docker exec ${CONTAINER_ID} sh -c '
218+
set -eux
219+
nvidia-smi
220+
221+
# Platform Tests (CUDA) # 4min
222+
cd /workdir/tests
223+
pytest -v -s cuda/test_cuda_context.py
224+
'
225+
226+
- name: Cleanup container and images
227+
if: always()
228+
run: |
229+
docker rm -f ${CONTAINER_ID} || true
230+
docker image prune -a --force --filter "until=24h"
231+
docker system df
232+
233+
example-test:
234+
needs: [build-image]
235+
if: needs.build-image.result == 'success'
236+
runs-on:
237+
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
238+
fleet:x86-g6xl-runner
239+
steps:
240+
- name: Checkout DLC source
241+
uses: actions/checkout@v5
242+
243+
- name: ECR login
244+
run: |
245+
aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_REGION }}.amazonaws.com
246+
247+
- name: Download image URI artifact
248+
uses: actions/download-artifact@v4
249+
with:
250+
name: vllm-rayserve-ec2-image-uri
251+
252+
- name: Resolve image URI for test
253+
run: |
254+
IMAGE_URI=$(cat image_uri.txt)
255+
echo "Resolved image URI: $IMAGE_URI"
256+
echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV
257+
258+
- name: Pull image
259+
run: |
260+
docker pull $IMAGE_URI
261+
262+
- name: Checkout vLLM Tests
263+
uses: actions/checkout@v5
264+
with:
265+
repository: vllm-project/vllm
266+
ref: v0.10.2
267+
path: vllm_source
268+
269+
- name: Start container
270+
run: |
271+
CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
272+
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
273+
-v ${HOME}/.cache/vllm:/root/.cache/vllm \
274+
-v ./vllm_source:/workdir --workdir /workdir \
275+
-e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
276+
${IMAGE_URI})
277+
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
278+
279+
- name: Setup for vLLM Test
280+
run: |
281+
docker exec ${CONTAINER_ID} sh -c '
282+
set -eux
283+
uv pip install --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
284+
uv pip install --system pytest pytest-asyncio
285+
uv pip install --system -e tests/vllm_test_utils
286+
uv pip install --system hf_transfer
287+
mkdir src
288+
mv vllm src/vllm
289+
'
290+
291+
- name: Run vLLM Tests
292+
run: |
293+
docker exec ${CONTAINER_ID} sh -c '
294+
set -eux
295+
nvidia-smi
296+
297+
# Examples Test # 30min
298+
cd /workdir/examples
299+
pip install tensorizer # for tensorizer test
300+
python3 offline_inference/basic/generate.py --model facebook/opt-125m
301+
# python3 offline_inference/basic/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10
302+
python3 offline_inference/basic/chat.py
303+
python3 offline_inference/prefix_caching.py
304+
python3 offline_inference/llm_engine_example.py
305+
python3 offline_inference/audio_language.py --seed 0
306+
python3 offline_inference/vision_language.py --seed 0
307+
python3 offline_inference/vision_language_pooling.py --seed 0
308+
python3 offline_inference/vision_language_multi_image.py --seed 0
309+
VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
310+
python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
311+
python3 offline_inference/basic/classify.py
312+
python3 offline_inference/basic/embed.py
313+
python3 offline_inference/basic/score.py
314+
VLLM_USE_V1=0 python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2
315+
'
316+
317+
- name: Cleanup container and images
318+
if: always()
319+
run: |
320+
docker rm -f ${CONTAINER_ID} || true
321+
docker image prune -a --force --filter "until=24h"
322+
docker system df

0 commit comments

Comments
 (0)