Skip to content

Commit 398b7c4

Browse files
authored
feat(anthropic): Add proper tool calling data to Anthropic integration (#4769)
- Format the response of the LLM (`gen_ai.response.text`) correctly. Not using the JSON but only use the actual text that was returned. - Add responses for tool calls (`gen_ai.response.tool_calls`) to the LLM spans. - Add results of tool calls to the request (`gen_ai.request.messages`). Before: <img width="1120" height="570" alt="Screenshot 2025-09-12 at 10 43 32" src="https://github.com/user-attachments/assets/3c9aa656-b7d8-4520-9220-87dad45e49fb" /> After: <img width="1120" height="690" alt="Screenshot 2025-09-12 at 10 45 11" src="https://github.com/user-attachments/assets/3d33b27a-f1aa-4467-b2f3-cb16ce1de31e" />
1 parent b19e086 commit 398b7c4

File tree

2 files changed

+61
-35
lines changed

2 files changed

+61
-35
lines changed

sentry_sdk/integrations/anthropic.py

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from functools import wraps
2-
import json
32
from typing import TYPE_CHECKING
43

54
import sentry_sdk
@@ -117,8 +116,29 @@ def _set_input_data(span, kwargs, integration):
117116
and should_send_default_pii()
118117
and integration.include_prompts
119118
):
119+
normalized_messages = []
120+
for message in messages:
121+
if (
122+
message.get("role") == "user"
123+
and "content" in message
124+
and isinstance(message["content"], (list, tuple))
125+
):
126+
for item in message["content"]:
127+
if item.get("type") == "tool_result":
128+
normalized_messages.append(
129+
{
130+
"role": "tool",
131+
"content": {
132+
"tool_use_id": item.get("tool_use_id"),
133+
"output": item.get("content"),
134+
},
135+
}
136+
)
137+
else:
138+
normalized_messages.append(message)
139+
120140
set_data_normalized(
121-
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, safe_serialize(messages)
141+
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, normalized_messages, unpack=False
122142
)
123143

124144
set_data_normalized(
@@ -159,21 +179,36 @@ def _set_output_data(
159179
Set output data for the span based on the AI response."""
160180
span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, model)
161181
if should_send_default_pii() and integration.include_prompts:
162-
set_data_normalized(
163-
span,
164-
SPANDATA.GEN_AI_RESPONSE_TEXT,
165-
json.dumps(content_blocks),
166-
unpack=False,
167-
)
182+
output_messages = {
183+
"response": [],
184+
"tool": [],
185+
} # type: (dict[str, list[Any]])
186+
187+
for output in content_blocks:
188+
if output["type"] == "text":
189+
output_messages["response"].append(output["text"])
190+
elif output["type"] == "tool_use":
191+
output_messages["tool"].append(output)
192+
193+
if len(output_messages["tool"]) > 0:
194+
set_data_normalized(
195+
span,
196+
SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
197+
output_messages["tool"],
198+
unpack=False,
199+
)
200+
201+
if len(output_messages["response"]) > 0:
202+
set_data_normalized(
203+
span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_messages["response"]
204+
)
168205

169206
record_token_usage(
170207
span,
171208
input_tokens=input_tokens,
172209
output_tokens=output_tokens,
173210
)
174211

175-
# TODO: GEN_AI_RESPONSE_TOOL_CALLS ?
176-
177212
if finish_span:
178213
span.__exit__(None, None, None)
179214

tests/integrations/anthropic/test_anthropic.py

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1+
import pytest
12
from unittest import mock
23

3-
44
try:
55
from unittest.mock import AsyncMock
66
except ImportError:
@@ -10,7 +10,6 @@ async def __call__(self, *args, **kwargs):
1010
return super(AsyncMock, self).__call__(*args, **kwargs)
1111

1212

13-
import pytest
1413
from anthropic import Anthropic, AnthropicError, AsyncAnthropic, AsyncStream, Stream
1514
from anthropic.types import MessageDeltaUsage, TextDelta, Usage
1615
from anthropic.types.content_block_delta_event import ContentBlockDeltaEvent
@@ -20,9 +19,6 @@ async def __call__(self, *args, **kwargs):
2019
from anthropic.types.message_delta_event import MessageDeltaEvent
2120
from anthropic.types.message_start_event import MessageStartEvent
2221

23-
from sentry_sdk.integrations.anthropic import _set_output_data, _collect_ai_data
24-
from sentry_sdk.utils import package_version
25-
2622
try:
2723
from anthropic.types import InputJSONDelta
2824
except ImportError:
@@ -46,9 +42,16 @@ async def __call__(self, *args, **kwargs):
4642

4743
from sentry_sdk import start_transaction, start_span
4844
from sentry_sdk.consts import OP, SPANDATA
49-
from sentry_sdk.integrations.anthropic import AnthropicIntegration
45+
from sentry_sdk.integrations.anthropic import (
46+
AnthropicIntegration,
47+
_set_output_data,
48+
_collect_ai_data,
49+
)
50+
from sentry_sdk.utils import package_version
51+
5052

5153
ANTHROPIC_VERSION = package_version("anthropic")
54+
5255
EXAMPLE_MESSAGE = Message(
5356
id="id",
5457
model="model",
@@ -121,10 +124,7 @@ def test_nonstreaming_create_message(
121124
span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
122125
== '[{"role": "user", "content": "Hello, Claude"}]'
123126
)
124-
assert (
125-
span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
126-
== '[{"text": "Hi, I\'m Claude.", "type": "text"}]'
127-
)
127+
assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
128128
else:
129129
assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
130130
assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
@@ -193,10 +193,7 @@ async def test_nonstreaming_create_message_async(
193193
span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
194194
== '[{"role": "user", "content": "Hello, Claude"}]'
195195
)
196-
assert (
197-
span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
198-
== '[{"text": "Hi, I\'m Claude.", "type": "text"}]'
199-
)
196+
assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude."
200197
else:
201198
assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
202199
assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
@@ -296,10 +293,7 @@ def test_streaming_create_message(
296293
span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
297294
== '[{"role": "user", "content": "Hello, Claude"}]'
298295
)
299-
assert (
300-
span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
301-
== '[{"text": "Hi! I\'m Claude!", "type": "text"}]'
302-
)
296+
assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
303297

304298
else:
305299
assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
@@ -403,10 +397,7 @@ async def test_streaming_create_message_async(
403397
span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
404398
== '[{"role": "user", "content": "Hello, Claude"}]'
405399
)
406-
assert (
407-
span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
408-
== '[{"text": "Hi! I\'m Claude!", "type": "text"}]'
409-
)
400+
assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!"
410401

411402
else:
412403
assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
@@ -539,7 +530,7 @@ def test_streaming_create_message_with_input_json_delta(
539530
)
540531
assert (
541532
span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
542-
== '[{"text": "{\'location\': \'San Francisco, CA\'}", "type": "text"}]'
533+
== "{'location': 'San Francisco, CA'}"
543534
)
544535
else:
545536
assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
@@ -679,7 +670,7 @@ async def test_streaming_create_message_with_input_json_delta_async(
679670
)
680671
assert (
681672
span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
682-
== '[{"text": "{\'location\': \'San Francisco, CA\'}", "type": "text"}]'
673+
== "{'location': 'San Francisco, CA'}"
683674
)
684675

685676
else:
@@ -835,7 +826,7 @@ def test_set_output_data_with_input_json_delta(sentry_init):
835826

836827
assert (
837828
span._data.get(SPANDATA.GEN_AI_RESPONSE_TEXT)
838-
== "[{\"text\": \"{'test': 'data','more': 'json'}\", \"type\": \"text\"}]"
829+
== "{'test': 'data','more': 'json'}"
839830
)
840831
assert span._data.get(SPANDATA.GEN_AI_USAGE_INPUT_TOKENS) == 10
841832
assert span._data.get(SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS) == 20

0 commit comments

Comments
 (0)