Skip to content

Commit 55170ec

Browse files
authored
fix: llmapi-launch add add trtllm-bench test with engine building (#4… (NVIDIA#4550)
Signed-off-by: Superjomn <[email protected]>
1 parent 00e0837 commit 55170ec

File tree

6 files changed

+173
-21
lines changed

6 files changed

+173
-21
lines changed

tensorrt_llm/executor/ipc.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,10 @@ def __init__(self,
3737
'''
3838
Parameters:
3939
address (tuple[str, Optional[bytes]], optional): The address (tcp-ip_port, hmac_auth_key) for the IPC. Defaults to None. If hmac_auth_key is None and use_hmac_encryption is False, the queue will not use HMAC encryption.
40+
socket_type (int): The type of socket to use. Defaults to zmq.PAIR.
4041
is_server (bool): Whether the current process is the server or the client.
42+
is_async (bool): Whether to use asyncio for the socket. Defaults to False.
43+
name (str, optional): The name of the queue. Defaults to None.
4144
use_hmac_encryption (bool): Whether to use HMAC encryption for pickled data. Defaults to True.
4245
'''
4346

@@ -57,10 +60,7 @@ def __init__(self,
5760
self.use_hmac_encryption = use_hmac_encryption
5861

5962
# Check HMAC key condition
60-
if self.use_hmac_encryption and self.is_server and self.hmac_key is not None:
61-
raise ValueError(
62-
"Server should not receive HMAC key when encryption is enabled")
63-
elif self.use_hmac_encryption and not self.is_server and self.hmac_key is None:
63+
if self.use_hmac_encryption and not self.is_server and self.hmac_key is None:
6464
raise ValueError(
6565
"Client must receive HMAC key when encryption is enabled")
6666
elif not self.use_hmac_encryption and self.hmac_key is not None:
@@ -79,7 +79,7 @@ def __init__(self,
7979
f"Server [{name}] bound to {self.address_endpoint} in {self.socket_type_str[socket_type]}\n",
8080
"green")
8181

82-
if self.use_hmac_encryption:
82+
if self.use_hmac_encryption and not self.hmac_key:
8383
# Initialize HMAC key for pickle encryption
8484
logger.info(f"Generating a new HMAC key for server {self.name}")
8585
self.hmac_key = os.urandom(32)

tensorrt_llm/executor/utils.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ def get_spawn_proxy_process_ipc_addr_env() -> str | None:
2323
return os.getenv("TLLM_SPAWN_PROXY_PROCESS_IPC_ADDR")
2424

2525

26+
def get_spawn_proxy_process_ipc_hmac_key_env() -> bytes | None:
27+
''' Get the HMAC key for the spawn proxy process dynamically. '''
28+
if key := os.getenv("TLLM_SPAWN_PROXY_PROCESS_IPC_HMAC_KEY"):
29+
return bytes.fromhex(key)
30+
31+
2632
def get_spawn_proxy_process_env() -> bool:
2733
''' Get the environment variable for the spawn proxy process dynamically. '''
2834
return os.getenv("TLLM_SPAWN_PROXY_PROCESS") == "1"
@@ -42,10 +48,7 @@ def create_mpi_comm_session(
4248
print_colored_debug(
4349
f"Using RemoteMpiPoolSessionClient to bind to external MPI processes at {get_spawn_proxy_process_ipc_addr_env()}\n",
4450
"yellow")
45-
hmac_key = os.getenv("TLLM_SPAWN_PROXY_PROCESS_IPC_HMAC_KEY")
46-
# Convert the hex string to bytes
47-
if hmac_key is not None:
48-
hmac_key = bytes.fromhex(hmac_key)
51+
hmac_key = get_spawn_proxy_process_ipc_hmac_key_env()
4952
return RemoteMpiCommSessionClient(
5053
addr=get_spawn_proxy_process_ipc_addr_env(), hmac_key=hmac_key)
5154
else:

tensorrt_llm/llmapi/mgmn_leader_node.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
MPI Proxy process to connect and get the MPI task to run.
44
'''
55
from tensorrt_llm._utils import mpi_world_size
6-
from tensorrt_llm.executor.utils import get_spawn_proxy_process_ipc_addr_env
6+
from tensorrt_llm.executor.utils import (
7+
get_spawn_proxy_process_ipc_addr_env,
8+
get_spawn_proxy_process_ipc_hmac_key_env)
79
from tensorrt_llm.llmapi.mpi_session import RemoteMpiCommSessionServer
810
from tensorrt_llm.llmapi.utils import print_colored_debug
911

@@ -17,6 +19,7 @@ def launch_server_main(sub_comm=None):
1719
comm=sub_comm,
1820
n_workers=num_ranks,
1921
addr=get_spawn_proxy_process_ipc_addr_env(),
22+
hmac_key=get_spawn_proxy_process_ipc_hmac_key_env(),
2023
is_comm=True)
2124
print_colored_debug(
2225
f"MPI Comm Server started at {get_spawn_proxy_process_ipc_addr_env()}")

tests/integration/defs/test_e2e.py

Lines changed: 155 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import sys
2020
import tempfile
2121
from pathlib import Path
22-
from typing import Optional, Tuple, Union
22+
from typing import Any, Optional, Tuple, Union
2323

2424
import pytest
2525
import yaml
@@ -434,6 +434,148 @@ def test_qwen_e2e_cpprunner_large_new_tokens(model_name, model_path, llm_venv,
434434
), f"Found zero length in sequence_lengths tensor: {seq_lengths}"
435435

436436

437+
# TODO replace the trtllm_bench_prolog
438+
class BenchRunner:
439+
440+
def __init__(self,
441+
llm_root: str,
442+
llm_venv: Any,
443+
model_subdir: str,
444+
model_name: str,
445+
streaming: bool,
446+
tp_size: int,
447+
use_pytorch_backend: bool = False,
448+
skip_engine_build: bool = False,
449+
quant: Optional[str] = None,
450+
extra_llm_api_options: Optional[str] = None,
451+
use_mpirun: bool = False):
452+
453+
llm_models = llm_models_root()
454+
assert llm_models is not None
455+
self.llm_root = llm_root
456+
self.llm_venv = llm_venv
457+
self.model_path = Path(llm_models, model_subdir).absolute()
458+
self.model_name = model_name
459+
self.quant = quant
460+
self.streaming = streaming
461+
self.skip_engine_build = skip_engine_build
462+
self.use_pytorch_backend = use_pytorch_backend
463+
self.use_mpirun = use_mpirun
464+
self.tp_size = tp_size
465+
self.quant_name = self.quant if self.quant is not None else "FP16"
466+
self.extra_llm_api_options = extra_llm_api_options
467+
468+
self.work_dir = Path(tempfile.TemporaryDirectory().name)
469+
470+
self.dataset_path = os.path.join(self.work_dir, f"data.txt")
471+
if self.use_mpirun:
472+
self.mpirun_cmd = f"mpirun --allow-run-as-root -n {self.tp_size} trtllm-llmapi-launch"
473+
else:
474+
self.mpirun_cmd = ""
475+
self.engine_path = None
476+
477+
def __call__(self):
478+
self.prepare_dataset()
479+
if not (self.skip_engine_build or self.use_pytorch_backend):
480+
self.build_engine()
481+
self.run_bench()
482+
483+
def prepare_dataset(self):
484+
dataset_tool = Path(self.llm_root, "benchmarks", "cpp",
485+
"prepare_dataset.py")
486+
487+
# Generate a small dataset to run a test.
488+
self.work_dir.mkdir(parents=True)
489+
command = [
490+
f"{dataset_tool.resolve()}",
491+
"--stdout",
492+
"--tokenizer",
493+
f"{self.model_path}",
494+
"token-norm-dist",
495+
"--input-mean",
496+
"128",
497+
"--output-mean",
498+
"128",
499+
"--input-stdev",
500+
"0",
501+
"--output-stdev",
502+
"0",
503+
"--num-requests",
504+
"10",
505+
]
506+
print(f"Running command: {' '.join(command)}")
507+
dataset_output = self.llm_venv.run_cmd(
508+
command,
509+
caller=check_output,
510+
)
511+
# Grab the stdout and write it to a dataset file for passing to suite.
512+
with open(self.dataset_path, "w") as dataset:
513+
dataset.write(dataset_output)
514+
515+
def build_engine(self):
516+
if self.skip_engine_build:
517+
return
518+
519+
build_cmd = \
520+
f"{self.mpirun_cmd} " \
521+
f"trtllm-bench " \
522+
f"--model {self.model_name} " \
523+
f"--model_path {self.model_path} " \
524+
f"--workspace {self.work_dir} " \
525+
f"build --tp_size {self.tp_size}"
526+
527+
if self.quant is not None:
528+
build_cmd = f"{build_cmd} --quantization {self.quant}"
529+
530+
build_cmd = f"{build_cmd} --dataset {self.dataset_path}"
531+
build_output = check_output(build_cmd,
532+
shell=True,
533+
env=self.llm_venv._new_env)
534+
535+
for line in build_output.split("\n")[::-1]:
536+
if line.startswith("ENGINE SAVED:"):
537+
self.engine_path = Path(line.split(":")[1])
538+
break
539+
540+
def run_bench(self):
541+
streaming = "--streaming" if self.streaming else ""
542+
benchmark_cmd = \
543+
f"{self.mpirun_cmd} " \
544+
f"trtllm-bench --model {self.model_name} --model_path {self.model_path} " \
545+
f"throughput " \
546+
f"--tp {self.tp_size} "
547+
if self.engine_path:
548+
benchmark_cmd += f"--engine_dir {self.engine_path} "
549+
benchmark_cmd += f" --dataset {self.dataset_path} {streaming}"
550+
551+
if self.use_pytorch_backend:
552+
benchmark_cmd += " --backend pytorch"
553+
554+
if self.extra_llm_api_options:
555+
benchmark_cmd += f" --extra_llm_api_options {self.extra_llm_api_options}"
556+
check_call(benchmark_cmd, shell=True, env=self.llm_venv._new_env)
557+
558+
559+
@pytest.mark.parametrize("model_name", ["meta-llama/Meta-Llama-3-8B-Instruct"],
560+
ids=["llama3-8b"])
561+
@pytest.mark.parametrize("model_subdir",
562+
["llama-models-v3/llama-v3-8b-instruct-hf"],
563+
ids=["llama-v3"])
564+
@pytest.mark.parametrize("use_pytorch_backend", [True, False],
565+
ids=["pytorch_backend", "trt_backend"])
566+
def test_trtllm_bench_llmapi_launch(llm_root, llm_venv, model_name,
567+
model_subdir, use_pytorch_backend):
568+
runner = BenchRunner(llm_root=llm_root,
569+
llm_venv=llm_venv,
570+
model_name=model_name,
571+
model_subdir=model_subdir,
572+
streaming=False,
573+
use_pytorch_backend=use_pytorch_backend,
574+
use_mpirun=True,
575+
tp_size=2)
576+
runner()
577+
578+
437579
def trtllm_bench_prolog(
438580
llm_root,
439581
llm_venv,
@@ -664,14 +806,14 @@ def test_trtllm_bench_mgmn(llm_root, llm_venv):
664806
model_name = "meta-llama/Llama-3.1-8B"
665807
llama_model_dir = Path(
666808
llm_models_root()) / "llama-3.1-model/Llama-3.1-8B-Instruct"
667-
dataset_path = trtllm_bench_prolog(llm_root,
668-
llm_venv,
669-
engine_dir=None,
670-
model_subdir=llama_model_dir,
671-
model_name=model_name,
672-
quant=None,
673-
streaming=False,
674-
skip_engine_build=True)
809+
_, _, dataset_path = trtllm_bench_prolog(llm_root,
810+
llm_venv,
811+
engine_dir=None,
812+
model_subdir=llama_model_dir,
813+
model_name=model_name,
814+
quant=None,
815+
streaming=False,
816+
skip_engine_build=True)
675817

676818
benchmark_cmd = \
677819
f"mpirun -n 2 trtllm-llmapi-launch trtllm-bench --model {model_name} " \
@@ -685,7 +827,10 @@ def test_trtllm_bench_mgmn(llm_root, llm_venv):
685827
dir="./",
686828
delete=True,
687829
delete_on_close=True) as running_log:
688-
check_call(benchmark_cmd, shell=True, stdout=running_log)
830+
check_call(benchmark_cmd,
831+
shell=True,
832+
running_log=running_log,
833+
env=llm_venv._new_env)
689834
_check_mem_usage(running_log, [30, 0, 0, 0])
690835

691836

tests/integration/test_lists/test-db/l0_dgx_h100.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,3 +298,5 @@ l0_dgx_h100:
298298
- examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-t5-small-float16-enable_gemm_plugin-enable_attention_plugin-disable_paged_kv_cache-tp:2-pp:2-nb:1-disable_fp8]
299299
- examples/test_gpt.py::test_llm_gpt2_next_prompt_tuning[use_py_session-tp2]
300300
- unittest/llmapi/apps/_test_openai_multi_gpu.py -m "part0"
301+
- test_e2e.py::test_trtllm_bench_llmapi_launch[pytorch_backend-llama-v3-llama3-8b]
302+
- test_e2e.py::test_trtllm_bench_llmapi_launch[trt_backend-llama-v3-llama3-8b]

tests/integration/test_lists/waives.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,6 @@ examples/test_eagle.py::test_mistral_eagle_1gpu[mistral-7b-v0.1-eagle2] SKIP (ht
378378
examples/test_multimodal.py::test_llm_multimodal_general[neva-22b-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5220761)
379379
examples/test_multimodal.py::test_llm_multimodal_general[neva-22b-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5220761)
380380
examples/test_cli_flow.py::TestSantacoder::test_auto_dtype SKIP (https://nvbugs/5219531)
381-
test_e2e.py::test_trtllm_bench_mgmn SKIP (https://nvbugs/5220766)
382381
examples/test_medusa.py::test_llama_medusa_1gpu[llama-3.1-8b] SKIP (https://nvbugs/5219535)
383382
examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-4-mini-instruct-fp8-bfloat16] SKIP (https://nvbugspro.nvidia.com/bug/5226339)
384383
examples/test_multimodal.py::test_llm_multimodal_general[neva-22b-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5227342)

0 commit comments

Comments
 (0)