From dd328e90b5283073ceafe722bb01a0e0decfe519 Mon Sep 17 00:00:00 2001 From: David Kirov Date: Wed, 10 Sep 2025 13:38:09 +0200 Subject: [PATCH 1/9] Add logs config section --- hugging_face_tgi/README.md | 32 +++++++++++++++++++ .../assets/configuration/spec.yaml | 5 +++ .../hugging_face_tgi/data/conf.yaml.example | 20 ++++++++++++ 3 files changed, 57 insertions(+) diff --git a/hugging_face_tgi/README.md b/hugging_face_tgi/README.md index 9b0843218b382..b321c7df3ccf2 100644 --- a/hugging_face_tgi/README.md +++ b/hugging_face_tgi/README.md @@ -36,6 +36,38 @@ No additional installation is needed on your server. 3. [Restart the Agent][5]. +#### Logs + +The Hugging Face TGI integration can collect logs from the server container and forward them to Datadog. + + + + +1. Collecting logs is disabled by default in the Datadog Agent. Enable it in your `datadog.yaml` file: + + ```yaml + logs_enabled: true + ``` + +2. Uncomment and edit the logs configuration block in your `hugging_face_tgi.d/conf.yaml` file. Here's an example: + + ```yaml + logs: + - type: docker + source: hugging_face_tgi + service: hugging_face_tgi + ``` + + + + +Collecting logs is disabled by default in the Datadog Agent. To enable it, see [Kubernetes Log Collection][13]. + +Then, set Log Integrations as pod annotations. This can also be configured with a file, a configmap, or a key-value store. For more information, see the configuration section of [Kubernetes Log Collection][14]. + + + + ### Validation [Run the Agent's status subcommand][6] and look for `hugging_face_tgi` under the Checks section. diff --git a/hugging_face_tgi/assets/configuration/spec.yaml b/hugging_face_tgi/assets/configuration/spec.yaml index 083fa855dba70..b23413f3b5d11 100644 --- a/hugging_face_tgi/assets/configuration/spec.yaml +++ b/hugging_face_tgi/assets/configuration/spec.yaml @@ -13,3 +13,8 @@ files: openmetrics_endpoint.description: | Endpoint exposing Hugging Face TGI's Prometheus metrics. For more information, refer to https://huggingface.co/docs/text-generation-inference/en/basic_tutorials/monitoring + - template: logs + example: + - type: docker + source: hugging_face_tgi + service: diff --git a/hugging_face_tgi/datadog_checks/hugging_face_tgi/data/conf.yaml.example b/hugging_face_tgi/datadog_checks/hugging_face_tgi/data/conf.yaml.example index 24b46287dcae1..0662d391ec018 100644 --- a/hugging_face_tgi/datadog_checks/hugging_face_tgi/data/conf.yaml.example +++ b/hugging_face_tgi/datadog_checks/hugging_face_tgi/data/conf.yaml.example @@ -640,3 +640,23 @@ instances: # - # exclude: # - + +## Log Section +## +## type - required - Type of log input source (tcp / udp / file / windows_event). +## port / path / channel_path - required - Set port if type is tcp or udp. +## Set path if type is file. +## Set channel_path if type is windows_event. +## source - required - Attribute that defines which integration sent the logs. +## encoding - optional - For file specifies the file encoding. Default is utf-8. Other +## possible values are utf-16-le and utf-16-be. +## service - optional - The name of the service that generates the log. +## Overrides any `service` defined in the `init_config` section. +## tags - optional - Add tags to the collected logs. +## +## Discover Datadog log collection: https://docs.datadoghq.com/logs/log_collection/ +# +# logs: +# - type: docker +# source: hugging_face_tgi +# service: From 00df174b0fba1662c1c380b095f32791d5d525b5 Mon Sep 17 00:00:00 2001 From: David Kirov Date: Wed, 10 Sep 2025 13:55:38 +0200 Subject: [PATCH 2/9] Add missing README links --- hugging_face_tgi/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hugging_face_tgi/README.md b/hugging_face_tgi/README.md index b321c7df3ccf2..1ca14ee7c2746 100644 --- a/hugging_face_tgi/README.md +++ b/hugging_face_tgi/README.md @@ -112,3 +112,5 @@ Need help? Contact [Datadog support][9]. [9]: https://docs.datadoghq.com/help/ [10]: https://huggingface.co/docs/text-generation-inference/en/basic_tutorials/monitoring [11]: https://docs.datadoghq.com/agent/configuration/agent-configuration-files/#agent-configuration-directory +[13]: https://docs.datadoghq.com/agent/kubernetes/log/#setup +[14]: https://docs.datadoghq.com/agent/kubernetes/log/#configuration From d3d5b02c77b210e6c203545564a3ddf9f240ef03 Mon Sep 17 00:00:00 2001 From: David Kirov Date: Mon, 15 Sep 2025 14:37:04 +0200 Subject: [PATCH 3/9] Add pipeline --- hugging_face_tgi/README.md | 21 +- .../assets/logs/hugging_face_tgi.yaml | 272 ++++++++++++++++++ .../assets/logs/hugging_face_tgi_tests.yaml | 96 +++++++ 3 files changed, 387 insertions(+), 2 deletions(-) create mode 100644 hugging_face_tgi/assets/logs/hugging_face_tgi.yaml create mode 100644 hugging_face_tgi/assets/logs/hugging_face_tgi_tests.yaml diff --git a/hugging_face_tgi/README.md b/hugging_face_tgi/README.md index 1ca14ee7c2746..8f5a63c9b28f9 100644 --- a/hugging_face_tgi/README.md +++ b/hugging_face_tgi/README.md @@ -38,7 +38,7 @@ No additional installation is needed on your server. #### Logs -The Hugging Face TGI integration can collect logs from the server container and forward them to Datadog. +The Hugging Face TGI integration can collect logs from the server container and forward them to Datadog. The TGI server container needs to be started with the environment variable `NO_COLOR=1` and the option `--json-output` for the logs output to be correctly parsed by Datadog. If launching the server in this manner is not possible, the logs ingested by Datadog will likely be malformed, please refer to the troubleshooting section. @@ -55,7 +55,8 @@ The Hugging Face TGI integration can collect logs from the server container and logs: - type: docker source: hugging_face_tgi - service: hugging_face_tgi + service: text-generation-inference + auto_multi_line_detection: true ``` @@ -98,6 +99,22 @@ See [service_checks.json][8] for a list of service checks provided by this integ In containerized environments, ensure that the Agent has network access to the TGI metrics endpoint specified in the `hugging_face_tgi.d/conf.yaml` file. +If you wish to ingest non JSON TGI logs, use the following logs configuration: + +```yaml + logs: + - type: docker + source: hugging_face_tgi + service: text-generation-inference + auto_multi_line_detection: true + log_processing_rules: + - type: mask_sequences + name: strip_ansi + pattern: "\\x1B\\[[0-9;]*m" + replace_placeholder: "" + +``` + Need help? Contact [Datadog support][9]. diff --git a/hugging_face_tgi/assets/logs/hugging_face_tgi.yaml b/hugging_face_tgi/assets/logs/hugging_face_tgi.yaml new file mode 100644 index 0000000000000..c5a687d3db3e6 --- /dev/null +++ b/hugging_face_tgi/assets/logs/hugging_face_tgi.yaml @@ -0,0 +1,272 @@ +id: hugging_face_tgi +metric_id: hugging_face_tgi +backend_only: false +facets: null +pipeline: + type: pipeline + name: Hugging Face TGI + enabled: true + filter: + query: source:hugging_face_tgi + processors: + - type: grok-parser + name: Non JSON and no color + enabled: true + source: message + samples: + - '2025-09-09T11:29:51.795563Z INFO + generate_stream{parameters=GenerateParameters { best_of: None, + temperature: None, repetition_penalty: None, frequency_penalty: None, + top_k: None, top_p: None, typical_p: None, do_sample: false, + max_new_tokens: Some(20), return_full_text: None, stop: [], truncate: + None, watermark: false, details: false, decoder_input_details: false, + seed: None, top_n_tokens: None, grammar: None, adapter_id: None } + total_time="1.194364886s" validation_time="204.821µs" + queue_time="53.525µs" inference_time="1.194106715s" + time_per_token="59.705335ms" seed="None"}: + text_generation_router::server: router/src/server.rs:637: Success' + - '2025-09-09T11:28:03.840209Z ERROR + chat_completions{parameters="GenerateParameters { best_of: None, + temperature: None, repetition_penalty: None, frequency_penalty: None, + top_k: None, top_p: None, typical_p: None, do_sample: true, + max_new_tokens: Some(20), return_full_text: None, stop: [], truncate: + None, watermark: false, details: true, decoder_input_details: false, + seed: None, top_n_tokens: None, grammar: None, adapter_id: None + }"}:async_stream:generate_stream: text_generation_router::infer: + router/src/infer/mod.rs:126: `inputs` tokens + `max_new_tokens` must + be <= 512. Given: 1864 `inputs` tokens and 20 `max_new_tokens`' + - "2025-09-08T15:41:01.566464Z WARN text_generation_router::server: + router/src/server.rs:1906: Invalid hostname, defaulting to 0.0.0.0" + - "2025-09-08T15:38:42.366067Z INFO download: text_generation_launcher: + Starting check and download process for + teknium/OpenHermes-2.5-Mistral-7B" + - |- + 2025-09-08T15:38:40.500145Z INFO text_generation_launcher: Args { + model_id: "teknium/OpenHermes-2.5-Mistral-7B", + revision: None, + validation_workers: 2, + sharded: None, + num_shard: None, + quantize: None, + speculate: None, + dtype: None, + kv_cache_dtype: None, + trust_remote_code: false, + max_concurrent_requests: 128, + max_best_of: 2, + max_stop_sequences: 4, + max_top_n_tokens: 5, + max_input_tokens: None, + max_input_length: None, + max_total_tokens: None, + waiting_served_ratio: 0.3, + max_batch_prefill_tokens: Some( + 512, + ), + max_batch_total_tokens: None, + max_waiting_tokens: 20, + max_batch_size: None, + cuda_graphs: None, + hostname: "ip-172-31-21-18", + port: 80, + prometheus_port: 9000, + shard_uds_path: "/tmp/text-generation-server", + master_addr: "localhost", + master_port: 29500, + huggingface_hub_cache: None, + weights_cache_override: None, + disable_custom_kernels: false, + cuda_memory_fraction: 1.0, + rope_scaling: None, + rope_factor: None, + json_output: false, + otlp_endpoint: None, + otlp_service_name: "text-generation-inference.router", + cors_allow_origin: [], + api_key: None, + watermark_gamma: None, + watermark_delta: None, + ngrok: false, + ngrok_authtoken: None, + ngrok_edge: None, + tokenizer_config_path: None, + disable_grammar_support: false, + env: false, + max_client_batch_size: 4, + lora_adapters: None, + usage_stats: On, + payload_limit: 2000000, + enable_prefill_logprobs: false, + graceful_termination_timeout: 90, + } + grok: + supportRules: >- + tgi_date %{date("yyyy-MM-dd'T'HH:mm:ss.SSSSSS'Z'"):date} + + success_params (\s+total_time="%{regex("(?<=\")[A-z0-9.µ]*(?=\")"):hugging_face_tgi.total_time}")?(\s+validation_time="%{regex("(?<=\")[A-z0-9.µ]*(?=\")"):hugging_face_tgi.validation_time}")?(\s+queue_time="%{regex("(?<=\")[A-z0-9.µ]*(?=\")"):hugging_face_tgi.queue_time}")?(\s+inference_time="%{regex("(?<=\")[A-z0-9.µ]*(?=\")"):hugging_face_tgi.inference_time}")?(\s+time_per_token="%{regex("(?<=\")[A-z0-9.µ]*(?=\")"):hugging_face_tgi.time_per_token}")?(\s+seed="%{regex("(?<=\")[A-z0-9.µ]*(?=\")"):hugging_face_tgi.seed}")? + + log_tail %{word:hugging_face_tgi.component}(::%{word:hugging_face_tgi.sub_component})?:\s+(%{regex("[A-z0-9/\\.:]*(?=: )"):hugging_face_tgi.file}:\s+)?%{data:message} + + general_params parameters="?%{regex(".*\\s+}"):hugging_face_tgi.parameters} + + color (%{regex("\\[0-9]*m")})? + matchRules: >- + full_log + %{tgi_date}\s+%{notSpace:status}\s+%{word:hugging_face_tgi.operation_type}\{%{general_params}"?%{success_params}\s*\}(:%{regex("[A-z0-9/\\.:]*(?=: + )"):hugging_face_tgi.operation_sub_type})?:\s+%{log_tail} + + + init_log %{tgi_date}\s+%{notSpace:status}\s+%{word:hugging_face_tgi.component}:\s+Args\s+\{%{data:hugging_face_tgi:keyvalue(": ","()\\[\\]",""," ,")}\s+\} + + + short_log %{tgi_date}\s+%{notSpace:status}\s+(download:\s+)?%{log_tail} + - type: status-remapper + name: Status Remapper + enabled: true + sources: + - status + - type: date-remapper + name: Date Remapper + enabled: true + sources: + - date + - type: attribute-remapper + name: Span + enabled: true + sources: + - span + sourceType: attribute + target: hugging_face_tgi + targetType: attribute + preserveSource: false + overrideOnConflict: false + - type: attribute-remapper + name: Spans + enabled: true + sources: + - spans + sourceType: attribute + target: hugging_face_tgi.spans + targetType: attribute + preserveSource: false + overrideOnConflict: false + - type: attribute-remapper + name: Filename + enabled: true + sources: + - filename + sourceType: attribute + target: hugging_face_tgi.filename + targetType: attribute + preserveSource: false + overrideOnConflict: false + - type: attribute-remapper + name: Line number + enabled: true + sources: + - line_number + sourceType: attribute + target: hugging_face_tgi.line_number + targetType: attribute + preserveSource: false + overrideOnConflict: false + - type: attribute-remapper + name: Target + enabled: true + sources: + - target + sourceType: attribute + target: hugging_face_tgi.target + targetType: attribute + preserveSource: false + overrideOnConflict: false + - type: message-remapper + name: Message Remapper + enabled: true + sources: + - message + - fields.message + - type: grok-parser + name: JSON init + enabled: true + source: message + samples: + - |- + Args { + model_id: "teknium/OpenHermes-2.5-Mistral-7B", + revision: None, + validation_workers: 2, + sharded: None, + num_shard: None, + quantize: None, + speculate: None, + dtype: None, + kv_cache_dtype: None, + trust_remote_code: false, + max_concurrent_requests: 128, + max_best_of: 2, + max_stop_sequences: 4, + max_top_n_tokens: 5, + max_input_tokens: None, + max_input_length: None, + max_total_tokens: None, + waiting_served_ratio: 0.3, + max_batch_prefill_tokens: Some( + 512, + ), + max_batch_total_tokens: None, + max_waiting_tokens: 20, + max_batch_size: None, + cuda_graphs: None, + hostname: "ip-172-31-21-18", + port: 80, + prometheus_port: 9000, + shard_uds_path: "/tmp/text-generation-server", + master_addr: "localhost", + master_port: 29500, + huggingface_hub_cache: None, + weights_cache_override: None, + disable_custom_kernels: false, + cuda_memory_fraction: 1.0, + rope_scaling: None, + rope_factor: None, + json_output: true, + otlp_endpoint: None, + otlp_service_name: "text-generation-inference.router", + cors_allow_origin: [], + api_key: None, + watermark_gamma: None, + watermark_delta: None, + ngrok: false, + ngrok_authtoken: None, + ngrok_edge: None, + tokenizer_config_path: None, + disable_grammar_support: false, + env: false, + max_client_batch_size: 4, + lora_adapters: None, + usage_stats: On, + payload_limit: 2000000, + enable_prefill_logprobs: false, + graceful_termination_timeout: 90, + } + grok: + supportRules: "" + matchRules: 'rule Args\s+\{\s+%{data:hugging_face_tgi:keyvalue(": + ","()\\[\\]",""," ,")}\s+\}' + - type: grok-parser + name: Parameters + enabled: true + source: hugging_face_tgi.parameters + samples: + - "GenerateParameters { best_of: None, temperature: None, + repetition_penalty: None, frequency_penalty: None, top_k: None, top_p: + None, typical_p: None, do_sample: false, max_new_tokens: Some(20), + return_full_text: None, stop: [], truncate: None, watermark: false, + details: false, decoder_input_details: false, seed: None, + top_n_tokens: None, grammar: None, adapter_id: None }" + grok: + supportRules: "" + matchRules: 'rule + %{word:hugging_face_tgi.parameters.type}\s*\{\s+%{data:hugging_face_tgi.parameters:keyvalue(": + ","()\\[\\]",""," ,")}\s+\}' diff --git a/hugging_face_tgi/assets/logs/hugging_face_tgi_tests.yaml b/hugging_face_tgi/assets/logs/hugging_face_tgi_tests.yaml new file mode 100644 index 0000000000000..043f0c880b801 --- /dev/null +++ b/hugging_face_tgi/assets/logs/hugging_face_tgi_tests.yaml @@ -0,0 +1,96 @@ +id: hugging_face_tgi +tests: + - sample: '2025-09-09T11:29:51.795563Z INFO + generate_stream{parameters=GenerateParameters { best_of: None, + temperature: None, repetition_penalty: None, frequency_penalty: None, + top_k: None, top_p: None, typical_p: None, do_sample: false, + max_new_tokens: Some(20), return_full_text: None, stop: [], truncate: + None, watermark: false, details: false, decoder_input_details: false, + seed: None, top_n_tokens: None, grammar: None, adapter_id: None } + total_time="1.194364886s" validation_time="204.821µs" + queue_time="53.525µs" inference_time="1.194106715s" + time_per_token="59.705335ms" seed="None"}: text_generation_router::server: + router/src/server.rs:637: Success' + result: null + - sample: '2025-09-09T11:28:03.840209Z ERROR + chat_completions{parameters="GenerateParameters { best_of: None, + temperature: None, repetition_penalty: None, frequency_penalty: None, + top_k: None, top_p: None, typical_p: None, do_sample: true, + max_new_tokens: Some(20), return_full_text: None, stop: [], truncate: + None, watermark: false, details: true, decoder_input_details: false, seed: + None, top_n_tokens: None, grammar: None, adapter_id: None + }"}:async_stream:generate_stream: text_generation_router::infer: + router/src/infer/mod.rs:126: `inputs` tokens + `max_new_tokens` must be <= + 512. Given: 1864 `inputs` tokens and 20 `max_new_tokens`' + result: null + - sample: "2025-09-08T15:41:01.566464Z WARN text_generation_router::server: + router/src/server.rs:1906: Invalid hostname, defaulting to 0.0.0.0" + result: null + - sample: "2025-09-08T15:38:42.366067Z INFO download: text_generation_launcher: + Starting check and download process for teknium/OpenHermes-2.5-Mistral-7B" + result: null + - sample: |- + 2025-09-08T15:38:40.500145Z INFO text_generation_launcher: Args { + model_id: "teknium/OpenHermes-2.5-Mistral-7B", + revision: None, + validation_workers: 2, + sharded: None, + num_shard: None, + quantize: None, + speculate: None, + dtype: None, + kv_cache_dtype: None, + trust_remote_code: false, + max_concurrent_requests: 128, + max_best_of: 2, + max_stop_sequences: 4, + max_top_n_tokens: 5, + max_input_tokens: None, + max_input_length: None, + max_total_tokens: None, + waiting_served_ratio: 0.3, + max_batch_prefill_tokens: Some( + 512, + ), + max_batch_total_tokens: None, + max_waiting_tokens: 20, + max_batch_size: None, + cuda_graphs: None, + hostname: "ip-172-31-21-18", + port: 80, + prometheus_port: 9000, + shard_uds_path: "/tmp/text-generation-server", + master_addr: "localhost", + master_port: 29500, + huggingface_hub_cache: None, + weights_cache_override: None, + disable_custom_kernels: false, + cuda_memory_fraction: 1.0, + rope_scaling: None, + rope_factor: None, + json_output: false, + otlp_endpoint: None, + otlp_service_name: "text-generation-inference.router", + cors_allow_origin: [], + api_key: None, + watermark_gamma: None, + watermark_delta: None, + ngrok: false, + ngrok_authtoken: None, + ngrok_edge: None, + tokenizer_config_path: None, + disable_grammar_support: false, + env: false, + max_client_batch_size: 4, + lora_adapters: None, + usage_stats: On, + payload_limit: 2000000, + enable_prefill_logprobs: false, + graceful_termination_timeout: 90, + } + result: null + +# The `result` field should be left blank to start. Once you submit your log asset files with +# your integration pull-request in a Datadog GitHub repository, Datadog's validations will +# run your raw logs against your pipeline and return the result. If the result output in the +# validation is accurate, take the output and add it to the `result` field in your test YAML file. From bb56781e5b2abd15deff5177a3eeea8d098a875a Mon Sep 17 00:00:00 2001 From: David Kirov Date: Mon, 15 Sep 2025 16:50:06 +0200 Subject: [PATCH 4/9] Fix logs pipeline for CI --- .../assets/logs/hugging_face_tgi.yaml | 2 +- .../assets/logs/hugging_face_tgi_tests.yaml | 275 +++++++++++++++--- 2 files changed, 240 insertions(+), 37 deletions(-) diff --git a/hugging_face_tgi/assets/logs/hugging_face_tgi.yaml b/hugging_face_tgi/assets/logs/hugging_face_tgi.yaml index c5a687d3db3e6..42264cbdf06bb 100644 --- a/hugging_face_tgi/assets/logs/hugging_face_tgi.yaml +++ b/hugging_face_tgi/assets/logs/hugging_face_tgi.yaml @@ -1,5 +1,5 @@ id: hugging_face_tgi -metric_id: hugging_face_tgi +metric_id: hugging-face-tgi backend_only: false facets: null pipeline: diff --git a/hugging_face_tgi/assets/logs/hugging_face_tgi_tests.yaml b/hugging_face_tgi/assets/logs/hugging_face_tgi_tests.yaml index 043f0c880b801..ebe07ed822100 100644 --- a/hugging_face_tgi/assets/logs/hugging_face_tgi_tests.yaml +++ b/hugging_face_tgi/assets/logs/hugging_face_tgi_tests.yaml @@ -1,35 +1,240 @@ id: hugging_face_tgi tests: - - sample: '2025-09-09T11:29:51.795563Z INFO - generate_stream{parameters=GenerateParameters { best_of: None, - temperature: None, repetition_penalty: None, frequency_penalty: None, - top_k: None, top_p: None, typical_p: None, do_sample: false, - max_new_tokens: Some(20), return_full_text: None, stop: [], truncate: - None, watermark: false, details: false, decoder_input_details: false, - seed: None, top_n_tokens: None, grammar: None, adapter_id: None } - total_time="1.194364886s" validation_time="204.821µs" - queue_time="53.525µs" inference_time="1.194106715s" - time_per_token="59.705335ms" seed="None"}: text_generation_router::server: - router/src/server.rs:637: Success' - result: null - - sample: '2025-09-09T11:28:03.840209Z ERROR - chat_completions{parameters="GenerateParameters { best_of: None, - temperature: None, repetition_penalty: None, frequency_penalty: None, - top_k: None, top_p: None, typical_p: None, do_sample: true, - max_new_tokens: Some(20), return_full_text: None, stop: [], truncate: - None, watermark: false, details: true, decoder_input_details: false, seed: - None, top_n_tokens: None, grammar: None, adapter_id: None - }"}:async_stream:generate_stream: text_generation_router::infer: - router/src/infer/mod.rs:126: `inputs` tokens + `max_new_tokens` must be <= - 512. Given: 1864 `inputs` tokens and 20 `max_new_tokens`' - result: null - - sample: "2025-09-08T15:41:01.566464Z WARN text_generation_router::server: - router/src/server.rs:1906: Invalid hostname, defaulting to 0.0.0.0" - result: null - - sample: "2025-09-08T15:38:42.366067Z INFO download: text_generation_launcher: - Starting check and download process for teknium/OpenHermes-2.5-Mistral-7B" - result: null - - sample: |- + - + sample: "2025-09-09T11:29:51.795563Z INFO generate_stream{parameters=GenerateParameters { best_of: None, temperature: None, repetition_penalty: None, frequency_penalty: None, top_k: None, top_p: None, typical_p: None, do_sample: false, max_new_tokens: Some(20), return_full_text: None, stop: [], truncate: None, watermark: false, details: false, decoder_input_details: false, seed: None, top_n_tokens: None, grammar: None, adapter_id: None } total_time=\"1.194364886s\" validation_time=\"204.821µs\" queue_time=\"53.525µs\" inference_time=\"1.194106715s\" time_per_token=\"59.705335ms\" seed=\"None\"}: text_generation_router::server: router/src/server.rs:637: Success" + result: + custom: + date: 1757417391795 + hugging_face_tgi: + component: "text_generation_router" + file: "router/src/server.rs:637" + inference_time: "1.194106715s" + operation_type: "generate_stream" + parameters: + - + type: "GenerateParameters" + - + watermark: false + seed: "None" + do_sample: false + top_p: "None" + frequency_penalty: "None" + grammar: "None" + best_of: "None" + stop: "[]" + truncate: "None" + top_n_tokens: "None" + max_new_tokens: "Some(20)" + top_k: "None" + decoder_input_details: false + temperature: "None" + typical_p: "None" + details: false + adapter_id: "None" + return_full_text: "None" + repetition_penalty: "None" + queue_time: "53.525µs" + seed: "None" + sub_component: "server" + time_per_token: "59.705335ms" + total_time: "1.194364886s" + validation_time: "204.821µs" + status: "INFO" + message: "Success" + status: "info" + tags: + - "source:LOGS_SOURCE" + timestamp: 1757417391795 + - + sample: "2025-09-09T11:28:03.840209Z ERROR chat_completions{parameters=\"GenerateParameters { best_of: None, temperature: None, repetition_penalty: None, frequency_penalty: None, top_k: None, top_p: None, typical_p: None, do_sample: true, max_new_tokens: Some(20), return_full_text: None, stop: [], truncate: None, watermark: false, details: true, decoder_input_details: false, seed: None, top_n_tokens: None, grammar: None, adapter_id: None }\"}:async_stream:generate_stream: text_generation_router::infer: router/src/infer/mod.rs:126: `inputs` tokens + `max_new_tokens` must be <= 512. Given: 1864 `inputs` tokens and 20 `max_new_tokens`" + result: + custom: + date: 1757417283840 + hugging_face_tgi: + component: "text_generation_router" + file: "router/src/infer/mod.rs:126" + operation_sub_type: "async_stream:generate_stream" + operation_type: "chat_completions" + parameters: + - + type: "GenerateParameters" + - + watermark: false + seed: "None" + do_sample: true + top_p: "None" + frequency_penalty: "None" + grammar: "None" + best_of: "None" + stop: "[]" + truncate: "None" + top_n_tokens: "None" + max_new_tokens: "Some(20)" + top_k: "None" + decoder_input_details: false + temperature: "None" + typical_p: "None" + details: true + adapter_id: "None" + return_full_text: "None" + repetition_penalty: "None" + sub_component: "infer" + status: "ERROR" + message: "`inputs` tokens + `max_new_tokens` must be <= 512. Given: 1864 `inputs` tokens and 20 `max_new_tokens`" + status: "error" + tags: + - "source:LOGS_SOURCE" + timestamp: 1757417283840 + - + sample: "2025-09-08T15:41:01.566464Z WARN text_generation_router::server: router/src/server.rs:1906: Invalid hostname, defaulting to 0.0.0.0" + result: + custom: + date: 1757346061566 + hugging_face_tgi: + component: "text_generation_router" + file: "router/src/server.rs:1906" + sub_component: "server" + status: "WARN" + message: "Invalid hostname, defaulting to 0.0.0.0" + status: "warn" + tags: + - "source:LOGS_SOURCE" + timestamp: 1757346061566 + - + sample: "2025-09-08T15:38:42.366067Z INFO download: text_generation_launcher: Starting check and download process for teknium/OpenHermes-2.5-Mistral-7B" + result: + custom: + date: 1757345922366 + hugging_face_tgi: + component: "text_generation_launcher" + status: "INFO" + message: "Starting check and download process for teknium/OpenHermes-2.5-Mistral-7B" + status: "info" + tags: + - "source:LOGS_SOURCE" + timestamp: 1757345922366 + - + sample: |- + 2025-09-08T15:38:40.500145Z INFO text_generation_launcher: Args { + model_id: "teknium/OpenHermes-2.5-Mistral-7B", + revision: None, + validation_workers: 2, + sharded: None, + num_shard: None, + quantize: None, + speculate: None, + dtype: None, + kv_cache_dtype: None, + trust_remote_code: false, + max_concurrent_requests: 128, + max_best_of: 2, + max_stop_sequences: 4, + max_top_n_tokens: 5, + max_input_tokens: None, + max_input_length: None, + max_total_tokens: None, + waiting_served_ratio: 0.3, + max_batch_prefill_tokens: Some( + 512, + ), + max_batch_total_tokens: None, + max_waiting_tokens: 20, + max_batch_size: None, + cuda_graphs: None, + hostname: "ip-172-31-21-18", + port: 80, + prometheus_port: 9000, + shard_uds_path: "/tmp/text-generation-server", + master_addr: "localhost", + master_port: 29500, + huggingface_hub_cache: None, + weights_cache_override: None, + disable_custom_kernels: false, + cuda_memory_fraction: 1.0, + rope_scaling: None, + rope_factor: None, + json_output: false, + otlp_endpoint: None, + otlp_service_name: "text-generation-inference.router", + cors_allow_origin: [], + api_key: None, + watermark_gamma: None, + watermark_delta: None, + ngrok: false, + ngrok_authtoken: None, + ngrok_edge: None, + tokenizer_config_path: None, + disable_grammar_support: false, + env: false, + max_client_batch_size: 4, + lora_adapters: None, + usage_stats: On, + payload_limit: 2000000, + enable_prefill_logprobs: false, + graceful_termination_timeout: 90, + } + result: + custom: + date: 1757345920500 + hugging_face_tgi: + - + component: "text_generation_launcher" + - + validation_workers: 2 + cuda_graphs: "None" + usage_stats: "On" + sharded: "None" + trust_remote_code: false + max_total_tokens: "None" + hostname: "ip-172-31-21-18" + max_input_length: "None" + max_batch_size: "None" + shard_uds_path: "/tmp/text-generation-server" + waiting_served_ratio: 0.3 + num_shard: "None" + graceful_termination_timeout: 90 + json_output: false + dtype: "None" + kv_cache_dtype: "None" + payload_limit: 2000000 + max_stop_sequences: 4 + tokenizer_config_path: "None" + revision: "None" + weights_cache_override: "None" + lora_adapters: "None" + port: 80 + max_input_tokens: "None" + cuda_memory_fraction: 1.0 + otlp_service_name: "text-generation-inference.router" + max_top_n_tokens: 5 + rope_factor: "None" + watermark_delta: "None" + ngrok: false + disable_grammar_support: false + max_waiting_tokens: 20 + quantize: "None" + disable_custom_kernels: false + max_concurrent_requests: 128 + max_client_batch_size: 4 + rope_scaling: "None" + huggingface_hub_cache: "None" + speculate: "None" + max_best_of: 2 + model_id: "teknium/OpenHermes-2.5-Mistral-7B" + env: false + master_addr: "localhost" + watermark_gamma: "None" + ngrok_authtoken: "None" + api_key: "None" + prometheus_port: 9000 + ngrok_edge: "None" + cors_allow_origin: "[]" + master_port: 29500 + otlp_endpoint: "None" + enable_prefill_logprobs: false + max_batch_total_tokens: "None" + status: "INFO" + message: |- 2025-09-08T15:38:40.500145Z INFO text_generation_launcher: Args { model_id: "teknium/OpenHermes-2.5-Mistral-7B", revision: None, @@ -88,9 +293,7 @@ tests: enable_prefill_logprobs: false, graceful_termination_timeout: 90, } - result: null - -# The `result` field should be left blank to start. Once you submit your log asset files with -# your integration pull-request in a Datadog GitHub repository, Datadog's validations will -# run your raw logs against your pipeline and return the result. If the result output in the -# validation is accurate, take the output and add it to the `result` field in your test YAML file. + status: "info" + tags: + - "source:LOGS_SOURCE" + timestamp: 1757345920500 From 75f5587f900c16314aa28fccb4ea55cb20121f79 Mon Sep 17 00:00:00 2001 From: David Kirov Date: Mon, 15 Sep 2025 17:17:18 +0200 Subject: [PATCH 5/9] Add changelog --- hugging_face_tgi/changelog.d/21314.added | 1 + 1 file changed, 1 insertion(+) create mode 100644 hugging_face_tgi/changelog.d/21314.added diff --git a/hugging_face_tgi/changelog.d/21314.added b/hugging_face_tgi/changelog.d/21314.added new file mode 100644 index 0000000000000..22bb9dfa0e9b9 --- /dev/null +++ b/hugging_face_tgi/changelog.d/21314.added @@ -0,0 +1 @@ +Add logs From 494821a66e82f601d11b357042f4d454ffb79eb9 Mon Sep 17 00:00:00 2001 From: David Kirov Date: Tue, 16 Sep 2025 10:59:46 +0200 Subject: [PATCH 6/9] Add saved views --- .../saved_views/error_logs_overview.json | 24 +++++++++++++++++++ .../assets/saved_views/logs_overview.json | 24 +++++++++++++++++++ hugging_face_tgi/manifest.json | 5 +++- 3 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 hugging_face_tgi/assets/saved_views/error_logs_overview.json create mode 100644 hugging_face_tgi/assets/saved_views/logs_overview.json diff --git a/hugging_face_tgi/assets/saved_views/error_logs_overview.json b/hugging_face_tgi/assets/saved_views/error_logs_overview.json new file mode 100644 index 0000000000000..28dbaddc8110d --- /dev/null +++ b/hugging_face_tgi/assets/saved_views/error_logs_overview.json @@ -0,0 +1,24 @@ +{ + "name": "Hugging Face TGI Error Logs Overview", + "type": "logs", + "page": "stream", + "query": "source:hugging_face_tgi status:error", + "timerange": { + "interval_ms": 3600000 + }, + "visible_facets": [ + "source", + "host", + "service" + ], + "options": { + "columns": [ + "host", + "service" + ], + "show_date_column": true, + "show_message_column": true, + "message_display": "inline", + "show_timeline": true + } +} diff --git a/hugging_face_tgi/assets/saved_views/logs_overview.json b/hugging_face_tgi/assets/saved_views/logs_overview.json new file mode 100644 index 0000000000000..70d652634e51d --- /dev/null +++ b/hugging_face_tgi/assets/saved_views/logs_overview.json @@ -0,0 +1,24 @@ +{ + "name": "Hugging Face TGI Logs Overview", + "type": "logs", + "page": "stream", + "query": "source:velero", + "timerange": { + "interval_ms": 3600000 + }, + "visible_facets": [ + "source", + "host", + "service" + ], + "options": { + "columns": [ + "host", + "service" + ], + "show_date_column": true, + "show_message_column": true, + "message_display": "inline", + "show_timeline": true + } +} diff --git a/hugging_face_tgi/manifest.json b/hugging_face_tgi/manifest.json index 35073d9a29646..15e46b2aeb7ff 100644 --- a/hugging_face_tgi/manifest.json +++ b/hugging_face_tgi/manifest.json @@ -38,7 +38,10 @@ } }, "monitors": {}, - "saved_views": {} + "saved_views": { + "Hugging Face TGI Logs Overview": "assets/saved_views/logs_overview.json", + "Hugging Face TGI Error Logs Overview": "assets/saved_views/error_logs_overview.json" + } }, "author": { "support_email": "help@datadoghq.com", From 739d328acd6b4c2b24ba66af316ad037c3253747 Mon Sep 17 00:00:00 2001 From: dkirov-dd <166512750+dkirov-dd@users.noreply.github.com> Date: Tue, 16 Sep 2025 11:01:55 +0200 Subject: [PATCH 7/9] Update README.md --- hugging_face_tgi/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/hugging_face_tgi/README.md b/hugging_face_tgi/README.md index 8f5a63c9b28f9..7bdf92f2497d1 100644 --- a/hugging_face_tgi/README.md +++ b/hugging_face_tgi/README.md @@ -112,7 +112,6 @@ If you wish to ingest non JSON TGI logs, use the following logs configuration: name: strip_ansi pattern: "\\x1B\\[[0-9;]*m" replace_placeholder: "" - ``` Need help? Contact [Datadog support][9]. From 05c8387a6936840e7991ad9a14e6f456e4487ac5 Mon Sep 17 00:00:00 2001 From: David Kirov Date: Tue, 16 Sep 2025 11:05:03 +0200 Subject: [PATCH 8/9] Fix saved view source --- hugging_face_tgi/assets/saved_views/logs_overview.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hugging_face_tgi/assets/saved_views/logs_overview.json b/hugging_face_tgi/assets/saved_views/logs_overview.json index 70d652634e51d..8cb089b082d2c 100644 --- a/hugging_face_tgi/assets/saved_views/logs_overview.json +++ b/hugging_face_tgi/assets/saved_views/logs_overview.json @@ -2,7 +2,7 @@ "name": "Hugging Face TGI Logs Overview", "type": "logs", "page": "stream", - "query": "source:velero", + "query": "source:hugging_face_tgi", "timerange": { "interval_ms": 3600000 }, From 9fb0a07ebbf27bebeb6b2adf1a7476b3a2abe147 Mon Sep 17 00:00:00 2001 From: David Kirov Date: Tue, 16 Sep 2025 15:00:47 +0200 Subject: [PATCH 9/9] Update README --- hugging_face_tgi/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hugging_face_tgi/README.md b/hugging_face_tgi/README.md index 7bdf92f2497d1..34cef9bd1dd58 100644 --- a/hugging_face_tgi/README.md +++ b/hugging_face_tgi/README.md @@ -38,7 +38,7 @@ No additional installation is needed on your server. #### Logs -The Hugging Face TGI integration can collect logs from the server container and forward them to Datadog. The TGI server container needs to be started with the environment variable `NO_COLOR=1` and the option `--json-output` for the logs output to be correctly parsed by Datadog. If launching the server in this manner is not possible, the logs ingested by Datadog will likely be malformed, please refer to the troubleshooting section. +The Hugging Face TGI integration can collect logs from the server container and forward them to Datadog. The TGI server container needs to be started with the environment variable `NO_COLOR=1` and the option `--json-output` for the logs output to be correctly parsed by Datadog. After setting these variables, the server must be restarted to enable log ingestion