Skip to content

Commit ea2502d

Browse files
authored
Update vLLM dependency to use SageMaker wheel, remove outdated tests (#2896)
1 parent 6a21e00 commit ea2502d

File tree

3 files changed

+2
-51
lines changed

3 files changed

+2
-51
lines changed

serving/docker/lmi-container-requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,6 @@ uvloop
3131
ninja
3232
peft
3333
llmcompressor
34-
vllm==0.10.2
34+
https://publish.djl.ai/sm-vllm/vllm-0.10.2-cp38-abi3-linux_x86_64.whl
3535
xgrammar==0.1.23
3636
flashinfer-python==0.2.5

tests/integration/launch_container.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ echo "Launching ${container_id}..."
298298

299299
total_retries=24
300300
if $is_llm; then
301-
total_retries=150
301+
total_retries=60
302302
if [[ "$platform" == *"inf2"* ]]; then
303303
total_retries=160
304304
fi

tests/integration/tests.py

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -310,18 +310,6 @@ def test_chatglm3_6b(self):
310310
r.launch("CUDA_VISIBLE_DEVICES=0,1,2,3")
311311
client.run("trtllm chatglm3-6b".split())
312312

313-
def test_gpt2(self):
314-
with Runner('tensorrt-llm', 'gpt2') as r:
315-
prepare.build_trtllm_handler_model("gpt2")
316-
r.launch("CUDA_VISIBLE_DEVICES=0,1,2,3")
317-
client.run("trtllm gpt2".split())
318-
319-
def test_santacoder(self):
320-
with Runner('tensorrt-llm', 'santacoder') as r:
321-
prepare.build_trtllm_handler_model("santacoder")
322-
r.launch("CUDA_VISIBLE_DEVICES=0,1,2,3")
323-
client.run("trtllm santacoder".split())
324-
325313
def test_llama_31_8b(self):
326314
with Runner('tensorrt-llm', 'llama-3-1-8b') as r:
327315
prepare.build_trtllm_handler_model('llama-3-1-8b')
@@ -345,12 +333,6 @@ def test_mistral(self):
345333
r.launch("CUDA_VISIBLE_DEVICES=0,1,2,3")
346334
client.run("trtllm mistral-7b".split())
347335

348-
def test_gpt_j_6b(self):
349-
with Runner('tensorrt-llm', 'gpt-j-6b') as r:
350-
prepare.build_trtllm_handler_model("gpt-j-6b")
351-
r.launch("CUDA_VISIBLE_DEVICES=0")
352-
client.run("trtllm gpt-j-6b".split())
353-
354336
def test_qwen_7b(self):
355337
with Runner('tensorrt-llm', 'qwen-7b') as r:
356338
prepare.build_trtllm_handler_model("qwen-7b")
@@ -563,31 +545,6 @@ def test_llama3_8b(self):
563545
@pytest.mark.gpu_4
564546
class TestVllm1:
565547

566-
def test_gpt_neox_20b(self):
567-
with Runner('lmi', 'gpt-neox-20b') as r:
568-
prepare.build_vllm_model("gpt-neox-20b")
569-
r.launch()
570-
client.run("vllm gpt-neox-20b".split())
571-
572-
def test_mistral_7b(self):
573-
with Runner('lmi', 'mistral-7b') as r:
574-
prepare.build_vllm_model("mistral-7b")
575-
r.launch()
576-
client.run("vllm mistral-7b".split())
577-
client.run("vllm_chat mistral-7b".split())
578-
579-
def test_phi2(self):
580-
with Runner('lmi', 'phi-2') as r:
581-
prepare.build_vllm_model("phi-2")
582-
r.launch("VLLM_USE_V1=0")
583-
client.run("vllm phi-2".split())
584-
585-
def test_starcoder2_7b(self):
586-
with Runner('lmi', 'starcoder2-7b') as r:
587-
prepare.build_vllm_model("starcoder2-7b")
588-
r.launch()
589-
client.run("vllm starcoder2-7b".split())
590-
591548
def test_gemma_2b(self):
592549
with Runner('lmi', 'gemma-2b') as r:
593550
prepare.build_vllm_model("gemma-2b")
@@ -968,12 +925,6 @@ def test_llama_vllm_nxdi_aot(self):
968925
@pytest.mark.gpu_4
969926
class TestCorrectnessTrtLlm:
970927

971-
def test_codestral_22b(self):
972-
with Runner('tensorrt-llm', 'codestral-22b') as r:
973-
prepare.build_correctness_model("trtllm-codestral-22b")
974-
r.launch("CUDA_VISIBLE_DEVICES=0,1,2,3")
975-
client.run("correctness trtllm-codestral-22b".split())
976-
977928
def test_llama3_8b(self):
978929
with Runner('tensorrt-llm', 'llama3-8b') as r:
979930
prepare.build_correctness_model("trtllm-llama3-8b")

0 commit comments

Comments
 (0)