@@ -310,18 +310,6 @@ def test_chatglm3_6b(self):
310
310
r .launch ("CUDA_VISIBLE_DEVICES=0,1,2,3" )
311
311
client .run ("trtllm chatglm3-6b" .split ())
312
312
313
- def test_gpt2 (self ):
314
- with Runner ('tensorrt-llm' , 'gpt2' ) as r :
315
- prepare .build_trtllm_handler_model ("gpt2" )
316
- r .launch ("CUDA_VISIBLE_DEVICES=0,1,2,3" )
317
- client .run ("trtllm gpt2" .split ())
318
-
319
- def test_santacoder (self ):
320
- with Runner ('tensorrt-llm' , 'santacoder' ) as r :
321
- prepare .build_trtllm_handler_model ("santacoder" )
322
- r .launch ("CUDA_VISIBLE_DEVICES=0,1,2,3" )
323
- client .run ("trtllm santacoder" .split ())
324
-
325
313
def test_llama_31_8b (self ):
326
314
with Runner ('tensorrt-llm' , 'llama-3-1-8b' ) as r :
327
315
prepare .build_trtllm_handler_model ('llama-3-1-8b' )
@@ -345,12 +333,6 @@ def test_mistral(self):
345
333
r .launch ("CUDA_VISIBLE_DEVICES=0,1,2,3" )
346
334
client .run ("trtllm mistral-7b" .split ())
347
335
348
- def test_gpt_j_6b (self ):
349
- with Runner ('tensorrt-llm' , 'gpt-j-6b' ) as r :
350
- prepare .build_trtllm_handler_model ("gpt-j-6b" )
351
- r .launch ("CUDA_VISIBLE_DEVICES=0" )
352
- client .run ("trtllm gpt-j-6b" .split ())
353
-
354
336
def test_qwen_7b (self ):
355
337
with Runner ('tensorrt-llm' , 'qwen-7b' ) as r :
356
338
prepare .build_trtllm_handler_model ("qwen-7b" )
@@ -563,31 +545,6 @@ def test_llama3_8b(self):
563
545
@pytest .mark .gpu_4
564
546
class TestVllm1 :
565
547
566
- def test_gpt_neox_20b (self ):
567
- with Runner ('lmi' , 'gpt-neox-20b' ) as r :
568
- prepare .build_vllm_model ("gpt-neox-20b" )
569
- r .launch ()
570
- client .run ("vllm gpt-neox-20b" .split ())
571
-
572
- def test_mistral_7b (self ):
573
- with Runner ('lmi' , 'mistral-7b' ) as r :
574
- prepare .build_vllm_model ("mistral-7b" )
575
- r .launch ()
576
- client .run ("vllm mistral-7b" .split ())
577
- client .run ("vllm_chat mistral-7b" .split ())
578
-
579
- def test_phi2 (self ):
580
- with Runner ('lmi' , 'phi-2' ) as r :
581
- prepare .build_vllm_model ("phi-2" )
582
- r .launch ("VLLM_USE_V1=0" )
583
- client .run ("vllm phi-2" .split ())
584
-
585
- def test_starcoder2_7b (self ):
586
- with Runner ('lmi' , 'starcoder2-7b' ) as r :
587
- prepare .build_vllm_model ("starcoder2-7b" )
588
- r .launch ()
589
- client .run ("vllm starcoder2-7b" .split ())
590
-
591
548
def test_gemma_2b (self ):
592
549
with Runner ('lmi' , 'gemma-2b' ) as r :
593
550
prepare .build_vllm_model ("gemma-2b" )
@@ -968,12 +925,6 @@ def test_llama_vllm_nxdi_aot(self):
968
925
@pytest .mark .gpu_4
969
926
class TestCorrectnessTrtLlm :
970
927
971
- def test_codestral_22b (self ):
972
- with Runner ('tensorrt-llm' , 'codestral-22b' ) as r :
973
- prepare .build_correctness_model ("trtllm-codestral-22b" )
974
- r .launch ("CUDA_VISIBLE_DEVICES=0,1,2,3" )
975
- client .run ("correctness trtllm-codestral-22b" .split ())
976
-
977
928
def test_llama3_8b (self ):
978
929
with Runner ('tensorrt-llm' , 'llama3-8b' ) as r :
979
930
prepare .build_correctness_model ("trtllm-llama3-8b" )
0 commit comments