From 8bbbd5273a64638b5820aba3c927c814ae5bde6e Mon Sep 17 00:00:00 2001 From: Luke Van Drie Date: Tue, 7 Oct 2025 20:28:40 +0000 Subject: [PATCH] refactor: Make base client concrete and usable The openAIModelServerClient could not be instantiated directly as it declared no supported APIs. While this may have been intended to enforce it as a base class, making it concrete provides more flexibility. This change allows the client to be used with any generic OpenAI-compatible endpoint. It also centralizes the API list so redundant overrides can be removed from the vLLM, TGI, and SGLang subclasses, improving maintainability. --- inference_perf/client/modelserver/openai_client.py | 2 +- inference_perf/client/modelserver/sglang_client.py | 2 -- inference_perf/client/modelserver/tgi_client.py | 3 --- inference_perf/client/modelserver/vllm_client.py | 3 --- 4 files changed, 1 insertion(+), 9 deletions(-) diff --git a/inference_perf/client/modelserver/openai_client.py b/inference_perf/client/modelserver/openai_client.py index a66a2713..bdc5c8b4 100644 --- a/inference_perf/client/modelserver/openai_client.py +++ b/inference_perf/client/modelserver/openai_client.py @@ -130,7 +130,7 @@ async def process_request(self, data: InferenceAPIData, stage_id: int, scheduled ) def get_supported_apis(self) -> List[APIType]: - return [] + return [APIType.Completion, APIType.Chat] @abstractmethod def get_prometheus_metric_metadata(self) -> PrometheusMetricMetadata: diff --git a/inference_perf/client/modelserver/sglang_client.py b/inference_perf/client/modelserver/sglang_client.py index b7a2f7a0..05c36cc7 100644 --- a/inference_perf/client/modelserver/sglang_client.py +++ b/inference_perf/client/modelserver/sglang_client.py @@ -48,8 +48,6 @@ def __init__( ) self.metric_filters = [f"model_name='{model_name}'", *additional_filters] - def get_supported_apis(self) -> List[APIType]: - return [APIType.Completion, APIType.Chat] def get_prometheus_metric_metadata(self) -> PrometheusMetricMetadata: return PrometheusMetricMetadata( diff --git a/inference_perf/client/modelserver/tgi_client.py b/inference_perf/client/modelserver/tgi_client.py index 19200619..9ff20044 100644 --- a/inference_perf/client/modelserver/tgi_client.py +++ b/inference_perf/client/modelserver/tgi_client.py @@ -48,9 +48,6 @@ def __init__( ) self.metric_filters = additional_filters - def get_supported_apis(self) -> List[APIType]: - return [APIType.Completion, APIType.Chat] - def get_prometheus_metric_metadata(self) -> PrometheusMetricMetadata: return PrometheusMetricMetadata( avg_queue_length=ModelServerPrometheusMetric( diff --git a/inference_perf/client/modelserver/vllm_client.py b/inference_perf/client/modelserver/vllm_client.py index d84287d3..f00b21f8 100644 --- a/inference_perf/client/modelserver/vllm_client.py +++ b/inference_perf/client/modelserver/vllm_client.py @@ -48,9 +48,6 @@ def __init__( ) self.metric_filters = [f"model_name='{model_name}'", *additional_filters] - def get_supported_apis(self) -> List[APIType]: - return [APIType.Completion, APIType.Chat] - def get_prometheus_metric_metadata(self) -> PrometheusMetricMetadata: return PrometheusMetricMetadata( avg_queue_length=ModelServerPrometheusMetric(