Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions holmes/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,25 @@ def as_streaming_tool_result_response(self):

def format_tool_result_data(tool_result: StructuredToolResult) -> str:
tool_response = tool_result.data
if tool_result.llm_data:
# Some tools can return data dedicated to the LLM. This can be reformatted or summarized data
# These will end up in the conversation history.
# This is a hack to swap the `data` field but maintain the structured output because some models require access to it. For example prometheus graphs require the random_key to generate a usable graph.
try:
if tool_result.data and isinstance(tool_result.data, str):
data_with_random_key = json.loads(tool_result.data)
if (
data_with_random_key
and data_with_random_key.get("random_key")
and data_with_random_key.get("data")
):
tool_result = tool_result.model_copy()
data_with_random_key["data"] = tool_result.llm_data
tool_result.data = data_with_random_key
tool_result.llm_data = None
except Exception:
pass

if isinstance(tool_result.data, str):
tool_response = tool_result.data
else:
Expand Down
9 changes: 8 additions & 1 deletion holmes/core/tool_calling_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,7 @@ def _get_tool_call_result(

tool = self.tool_executor.get_tool_by_name(tool_name)

return ToolCallResult(
tool_call_result = ToolCallResult(
tool_call_id=tool_call_id,
tool_name=tool_name,
description=str(tool.get_parameterized_one_liner(tool_params))
Expand All @@ -688,6 +688,12 @@ def _get_tool_call_result(
result=tool_response,
)

message = tool_call_result.as_tool_call_message()

token_count = self.llm.count_tokens_for_message(messages=[message])
tool_call_result.size = token_count
return tool_call_result

@staticmethod
def _log_tool_call_result(tool_span, tool_call_result: ToolCallResult):
tool_span.set_attributes(name=tool_call_result.tool_name)
Expand All @@ -698,6 +704,7 @@ def _log_tool_call_result(tool_span, tool_call_result: ToolCallResult):
metadata={
"status": tool_call_result.result.status,
"description": tool_call_result.description,
"token_count": tool_call_result.size,
},
)

Expand Down
1 change: 1 addition & 0 deletions holmes/core/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class StructuredToolResult(BaseModel):
error: Optional[str] = None
return_code: Optional[int] = None
data: Optional[Any] = None
llm_data: Optional[str] = None
url: Optional[str] = None
invocation: Optional[str] = None
params: Optional[Dict] = None
Expand Down
16 changes: 9 additions & 7 deletions holmes/core/tools_utils/tool_context_window_limiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,25 @@ def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM)
and 0 < TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
and TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT <= 100
):
message = tool_call_result.as_tool_call_message()

messages_token = llm.count_tokens_for_message(messages=[message])
if not tool_call_result.size:
message = tool_call_result.as_tool_call_message()
messages_token = llm.count_tokens_for_message(messages=[message])
tool_call_result.size = messages_token
context_window_size = llm.get_context_window_size()
max_tokens_allowed: int = int(
context_window_size * TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT // 100
)

if messages_token > max_tokens_allowed:
if tool_call_result.size > max_tokens_allowed:
relative_pct = (
(messages_token - max_tokens_allowed) / messages_token
(tool_call_result.size - max_tokens_allowed) / tool_call_result.size
) * 100
error_message = f"The tool call result is too large to return: {messages_token} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
error_message = f"The tool call result is too large to return: {tool_call_result.size} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
tool_call_result.result.status = StructuredToolResultStatus.ERROR
tool_call_result.result.data = None
tool_call_result.result.error = error_message

sentry_helper.capture_toolcall_contains_too_many_tokens(
tool_call_result, messages_token, max_tokens_allowed
tool_call_result, tool_call_result.size, max_tokens_allowed
)
tool_call_result.size = None
56 changes: 49 additions & 7 deletions holmes/plugins/toolsets/newrelic/newrelic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
)
from pydantic import BaseModel
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
from holmes.plugins.toolsets.prometheus.data_compression import (
simplify_prometheus_metric_object,
compact_metrics,
)
from holmes.plugins.toolsets.prometheus.model import PromResponse
from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
from holmes.plugins.toolsets.newrelic.new_relic_api import NewRelicAPI
Expand Down Expand Up @@ -76,14 +80,46 @@ def __init__(self, toolset: "NewRelicToolset"):
)
self._toolset = toolset

def format_metrics(
def compact_metrics_data(self, response: PromResponse) -> Optional[str]:
llm_data: Optional[str] = None
try:
metrics = [
simplify_prometheus_metric_object(metric, labels_to_remove=set())
for metric in response.data.result
]

compacted_data = compact_metrics(metrics)
original_size = len(json.dumps(response.to_json()))
compacted_size = len(json.dumps(compacted_data))
compaction_ratio = (
(1 - compacted_size / original_size) * 100 if original_size > 0 else 0
)

if compaction_ratio > self._toolset.compact_metrics_minimum_ratio:
# below this amount it's likely not worth mutating the response
llm_data = compacted_data
logging.debug(
f"Compressed Newrelic metrics: {original_size:,} → {compacted_size:,} chars "
f"({compaction_ratio:.1f}% reduction)"
)
else:
logging.debug(
f"Compressed Newrelic metrics: {original_size:,} → {compacted_size:,} chars "
f"({compaction_ratio:.1f}% reduction). Original data will be used instead."
)
except Exception:
logging.warning("Failed to compress newrelic data", exc_info=True)

return llm_data

def to_prometheus_records(
self,
records: List[Dict[str, Any]],
params: Optional[Dict[str, Any]] = None,
begin_key: str = "beginTimeSeconds",
end_key: str = "endTimeSeconds",
facet_key: str = "facet",
) -> Dict[str, Any]:
) -> PromResponse:
resp = PromResponse.from_newrelic_records(
records=records,
tool_name=self.name,
Expand All @@ -92,7 +128,7 @@ def format_metrics(
end_key=end_key,
facet_key=facet_key,
)
return resp.to_json()
return resp

def _invoke(
self, params: dict, user_approved: bool = False
Expand All @@ -108,7 +144,6 @@ def _invoke(

query = params["query"]
result: List[Dict[str, Any]] = api.execute_nrql_query(query)

qtype = params.get("query_type", "").lower()

if qtype == "traces":
Expand Down Expand Up @@ -137,12 +172,13 @@ def _invoke(
if qtype == "metrics" or "timeseries" in query.lower():
enriched_params = dict(params)
enriched_params["query"] = query
return_result = self.format_metrics(result, params=enriched_params)
if len(return_result.get("data", {}).get("results", [])):
return_result = result # type: ignore[assignment]
prom_data = self.to_prometheus_records(result, params=enriched_params)

return_result = prom_data.to_json()
return StructuredToolResult(
status=StructuredToolResultStatus.SUCCESS,
data=json.dumps(return_result, indent=2),
llm_data=self.compact_metrics_data(prom_data),
params=params,
)

Expand Down Expand Up @@ -205,12 +241,16 @@ class NewrelicConfig(BaseModel):
nr_api_key: Optional[str] = None
nr_account_id: Optional[str] = None
is_eu_datacenter: Optional[bool] = False
compact_metrics: bool = True
compact_metrics_minimum_ratio: int = 30 # 20 means 20% size reduction


class NewRelicToolset(Toolset):
nr_api_key: Optional[str] = None
nr_account_id: Optional[str] = None
is_eu_datacenter: bool = False
compact_metrics: bool = True
compact_metrics_minimum_ratio: int = 30

def __init__(self):
super().__init__(
Expand Down Expand Up @@ -241,6 +281,8 @@ def prerequisites_callable(
self.nr_account_id = nr_config.nr_account_id
self.nr_api_key = nr_config.nr_api_key
self.is_eu_datacenter = nr_config.is_eu_datacenter or False
self.compact_metrics = nr_config.compact_metrics
self.compact_metrics_minimum_ratio = nr_config.compact_metrics_minimum_ratio

if not self.nr_account_id or not self.nr_api_key:
return False, "New Relic account ID or API key is missing"
Expand Down
Loading
Loading