Skip to content

Commit 41d8d80

Browse files
simonrosenbergopenhands-agentxingyaoww
authored
refactor: standardize Observation base class (#929)
Co-authored-by: openhands <[email protected]> Co-authored-by: Xingyao Wang <[email protected]>
1 parent f10eed2 commit 41d8d80

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+1037
-883
lines changed

examples/01_standalone_sdk/02_custom_tools.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,10 @@ def __call__(self, action: GrepAction, conversation=None) -> GrepObservation: #
9292
files: set[str] = set()
9393

9494
# grep returns exit code 1 when no matches; treat as empty
95-
if result.output.strip():
96-
for line in result.output.strip().splitlines():
95+
output_text = result.text
96+
97+
if output_text.strip():
98+
for line in output_text.strip().splitlines():
9799
matches.append(line)
98100
# Expect "path:line:content" — take the file part before first ":"
99101
file_path = line.split(":", 1)[0]

openhands-sdk/openhands/sdk/mcp/definition.py

Lines changed: 20 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""MCPTool definition and implementation."""
22

33
import json
4-
from collections.abc import Sequence
54
from typing import Any
65

76
import mcp.types
@@ -51,28 +50,23 @@ def to_mcp_arguments(self) -> dict:
5150
class MCPToolObservation(Observation):
5251
"""Observation from MCP tool execution."""
5352

54-
content: list[TextContent | ImageContent] = Field(
55-
default_factory=list,
56-
description="Content returned from the MCP tool converted "
57-
"to LLM Ready TextContent or ImageContent",
58-
)
59-
is_error: bool = Field(
60-
default=False, description="Whether the call resulted in an error"
61-
)
6253
tool_name: str = Field(description="Name of the tool that was called")
6354

6455
@classmethod
6556
def from_call_tool_result(
6657
cls, tool_name: str, result: mcp.types.CallToolResult
6758
) -> "MCPToolObservation":
6859
"""Create an MCPToolObservation from a CallToolResult."""
69-
content: list[mcp.types.ContentBlock] = result.content
70-
convrted_content = []
71-
for block in content:
60+
61+
native_content: list[mcp.types.ContentBlock] = result.content
62+
content: list[TextContent | ImageContent] = [
63+
TextContent(text=f"[Tool '{tool_name}' executed.]")
64+
]
65+
for block in native_content:
7266
if isinstance(block, mcp.types.TextContent):
73-
convrted_content.append(TextContent(text=block.text))
67+
content.append(TextContent(text=block.text))
7468
elif isinstance(block, mcp.types.ImageContent):
75-
convrted_content.append(
69+
content.append(
7670
ImageContent(
7771
image_urls=[f"data:{block.mimeType};base64,{block.data}"],
7872
)
@@ -81,36 +75,32 @@ def from_call_tool_result(
8175
logger.warning(
8276
f"Unsupported MCP content block type: {type(block)}. Ignoring."
8377
)
78+
8479
return cls(
85-
content=convrted_content,
80+
content=content,
8681
is_error=result.isError,
8782
tool_name=tool_name,
8883
)
8984

90-
@property
91-
def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
92-
"""Format the observation for agent display."""
93-
initial_message = f"[Tool '{self.tool_name}' executed.]\n"
94-
if self.is_error:
95-
initial_message += "[An error occurred during execution.]\n"
96-
return [TextContent(text=initial_message)] + self.content
97-
9885
@property
9986
def visualize(self) -> Text:
10087
"""Return Rich Text representation of this observation."""
101-
content = Text()
102-
content.append(f"[MCP Tool '{self.tool_name}' Observation]\n", style="bold")
88+
text = Text()
89+
10390
if self.is_error:
104-
content.append("[Error during execution]\n", style="bold red")
91+
text.append("❌ ", style="red bold")
92+
text.append(self.ERROR_MESSAGE_HEADER, style="bold red")
93+
94+
text.append(f"[MCP Tool '{self.tool_name}' Observation]\n", style="bold")
10595
for block in self.content:
10696
if isinstance(block, TextContent):
10797
# try to see if block.text is a JSON
10898
try:
10999
parsed = json.loads(block.text)
110-
content.append(display_dict(parsed))
100+
text.append(display_dict(parsed))
111101
continue
112102
except (json.JSONDecodeError, TypeError):
113-
content.append(block.text + "\n")
103+
text.append(block.text + "\n")
114104
elif isinstance(block, ImageContent):
115-
content.append(f"[Image with {len(block.image_urls)} URLs]\n")
116-
return content
105+
text.append(f"[Image with {len(block.image_urls)} URLs]\n")
106+
return text

openhands-sdk/openhands/sdk/mcp/tool.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from litellm import ChatCompletionToolParam
1313
from pydantic import Field, ValidationError
1414

15-
from openhands.sdk.llm import TextContent
1615
from openhands.sdk.logger import get_logger
1716
from openhands.sdk.mcp.client import MCPClient
1817
from openhands.sdk.mcp.definition import MCPToolAction, MCPToolObservation
@@ -69,8 +68,8 @@ async def call_tool(self, action: MCPToolAction) -> MCPToolObservation:
6968
except Exception as e:
7069
error_msg = f"Error calling MCP tool {self.tool_name}: {str(e)}"
7170
logger.error(error_msg, exc_info=True)
72-
return MCPToolObservation(
73-
content=[TextContent(text=error_msg)],
71+
return MCPToolObservation.from_text(
72+
text=error_msg,
7473
is_error=True,
7574
tool_name=self.tool_name,
7675
)
@@ -154,8 +153,8 @@ def __call__(
154153
# Surface validation errors as an observation instead of crashing
155154
error_msg = f"Validation error for MCP tool '{self.name}' args: {e}"
156155
logger.error(error_msg, exc_info=True)
157-
return MCPToolObservation(
158-
content=[TextContent(text=error_msg)],
156+
return MCPToolObservation.from_text(
157+
text=error_msg,
159158
is_error=True,
160159
tool_name=self.name,
161160
)

openhands-sdk/openhands/sdk/tool/builtins/finish.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from pydantic import Field
55
from rich.text import Text
66

7-
from openhands.sdk.llm.message import ImageContent, TextContent
87
from openhands.sdk.tool.tool import (
98
Action,
109
Observation,
@@ -32,16 +31,15 @@ def visualize(self) -> Text:
3231

3332

3433
class FinishObservation(Observation):
35-
message: str = Field(description="Final message sent to the user.")
36-
37-
@property
38-
def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
39-
return [TextContent(text=self.message)]
34+
"""
35+
Observation returned after finishing a task.
36+
The FinishAction itself contains the message sent to the user so no
37+
extra fields are needed here.
38+
"""
4039

4140
@property
4241
def visualize(self) -> Text:
43-
"""Return Rich Text representation - empty since action shows the message."""
44-
# Don't duplicate the finish message display - action already shows it
42+
"""Return an empty Text representation since the message is in the action."""
4543
return Text()
4644

4745

@@ -65,7 +63,7 @@ def __call__(
6563
action: FinishAction,
6664
conversation: "BaseConversation | None" = None, # noqa: ARG002
6765
) -> FinishObservation:
68-
return FinishObservation(message=action.message)
66+
return FinishObservation.from_text(text=action.message)
6967

7068

7169
class FinishTool(ToolDefinition[FinishAction, FinishObservation]):

openhands-sdk/openhands/sdk/tool/builtins/think.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from pydantic import Field
55
from rich.text import Text
66

7-
from openhands.sdk.llm.message import ImageContent, TextContent
87
from openhands.sdk.tool.tool import (
98
Action,
109
Observation,
@@ -46,20 +45,15 @@ def visualize(self) -> Text:
4645

4746

4847
class ThinkObservation(Observation):
49-
"""Observation returned after logging a thought."""
50-
51-
content: str = Field(
52-
default="Your thought has been logged.", description="Confirmation message."
53-
)
54-
55-
@property
56-
def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
57-
return [TextContent(text=self.content)]
48+
"""
49+
Observation returned after logging a thought.
50+
The ThinkAction itself contains the thought logged so no extra
51+
fields are needed here.
52+
"""
5853

5954
@property
6055
def visualize(self) -> Text:
61-
"""Return Rich Text representation - empty since action shows the thought."""
62-
# Don't duplicate the thought display - action already shows it
56+
"""Return an empty Text representation since the thought is in the action."""
6357
return Text()
6458

6559

@@ -81,7 +75,7 @@ def __call__(
8175
_: ThinkAction,
8276
conversation: "BaseConversation | None" = None, # noqa: ARG002
8377
) -> ThinkObservation:
84-
return ThinkObservation()
78+
return ThinkObservation.from_text(text="Your thought has been logged.")
8579

8680

8781
class ThinkTool(ToolDefinition[ThinkAction, ThinkObservation]):

openhands-sdk/openhands/sdk/tool/schema.py

Lines changed: 75 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
from abc import ABC, abstractmethod
1+
from abc import ABC
22
from collections.abc import Sequence
3-
from typing import Any, ClassVar, TypeVar
3+
from typing import TYPE_CHECKING, Any, ClassVar, TypeVar
44

55
from pydantic import ConfigDict, Field, create_model
66
from rich.text import Text
@@ -13,6 +13,9 @@
1313
from openhands.sdk.utils.visualize import display_dict
1414

1515

16+
if TYPE_CHECKING:
17+
from typing import Self
18+
1619
S = TypeVar("S", bound="Schema")
1720

1821

@@ -190,23 +193,84 @@ def visualize(self) -> Text:
190193
class Observation(Schema, ABC):
191194
"""Base schema for output observation."""
192195

196+
ERROR_MESSAGE_HEADER: ClassVar[str] = "[An error occurred during execution.]\n"
197+
198+
content: list[TextContent | ImageContent] = Field(
199+
default_factory=list,
200+
description=(
201+
"Content returned from the tool as a list of "
202+
"TextContent/ImageContent objects. "
203+
"When there is an error, it should be written in this field."
204+
),
205+
)
206+
is_error: bool = Field(
207+
default=False, description="Whether the observation indicates an error"
208+
)
209+
210+
@classmethod
211+
def from_text(
212+
cls,
213+
text: str,
214+
is_error: bool = False,
215+
**kwargs: Any,
216+
) -> "Self":
217+
"""Utility to create an Observation from a simple text string.
218+
219+
Args:
220+
text: The text content to include in the observation.
221+
is_error: Whether this observation represents an error.
222+
**kwargs: Additional fields for the observation subclass.
223+
224+
Returns:
225+
An Observation instance with the text wrapped in a TextContent.
226+
"""
227+
return cls(content=[TextContent(text=text)], is_error=is_error, **kwargs)
228+
229+
@property
230+
def text(self) -> str:
231+
"""Extract all text content from the observation.
232+
233+
Returns:
234+
Concatenated text from all TextContent items in content.
235+
"""
236+
return "".join(
237+
item.text for item in self.content if isinstance(item, TextContent)
238+
)
239+
193240
@property
194-
@abstractmethod
195241
def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
196-
"""Get the observation string to show to the agent."""
242+
"""
243+
Default content formatting for converting observation to LLM readable content.
244+
Subclasses can override to provide richer content (e.g., images, diffs).
245+
"""
246+
llm_content: list[TextContent | ImageContent] = []
247+
248+
# If is_error is true, prepend error message
249+
if self.is_error:
250+
llm_content.append(TextContent(text=self.ERROR_MESSAGE_HEADER))
251+
252+
# Add content (now always a list)
253+
llm_content.extend(self.content)
254+
255+
return llm_content
197256

198257
@property
199258
def visualize(self) -> Text:
200-
"""Return Rich Text representation of this action.
259+
"""Return Rich Text representation of this observation.
201260
202-
This method can be overridden by subclasses to customize visualization.
203-
The base implementation displays all action fields systematically.
261+
Subclasses can override for custom visualization; by default we show the
262+
same text that would be sent to the LLM.
204263
"""
205-
content = Text()
264+
text = Text()
265+
266+
if self.is_error:
267+
text.append("❌ ", style="red bold")
268+
text.append(self.ERROR_MESSAGE_HEADER, style="bold red")
269+
206270
text_parts = content_to_str(self.to_llm_content)
207271
if text_parts:
208272
full_content = "".join(text_parts)
209-
content.append(full_content)
273+
text.append(full_content)
210274
else:
211-
content.append("[no text content]")
212-
return content
275+
text.append("[no text content]")
276+
return text

openhands-tools/openhands/tools/browser_use/definition.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,20 +28,24 @@
2828
class BrowserObservation(Observation):
2929
"""Base observation for browser operations."""
3030

31-
output: str = Field(description="The output message from the browser operation")
32-
error: str | None = Field(default=None, description="Error message if any")
3331
screenshot_data: str | None = Field(
3432
default=None, description="Base64 screenshot data if available"
3533
)
3634

3735
@property
3836
def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
39-
if self.error:
40-
return [TextContent(text=f"Error: {self.error}")]
37+
llm_content: list[TextContent | ImageContent] = []
4138

42-
content: list[TextContent | ImageContent] = [
43-
TextContent(text=maybe_truncate(self.output, MAX_BROWSER_OUTPUT_SIZE))
44-
]
39+
# If is_error is true, prepend error message
40+
if self.is_error:
41+
llm_content.append(TextContent(text=self.ERROR_MESSAGE_HEADER))
42+
43+
# Get text content and truncate if needed
44+
content_text = self.text
45+
if content_text:
46+
llm_content.append(
47+
TextContent(text=maybe_truncate(content_text, MAX_BROWSER_OUTPUT_SIZE))
48+
)
4549

4650
if self.screenshot_data:
4751
mime_type = "image/png"
@@ -55,9 +59,9 @@ def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
5559
mime_type = "image/webp"
5660
# Convert base64 to data URL format for ImageContent
5761
data_url = f"data:{mime_type};base64,{self.screenshot_data}"
58-
content.append(ImageContent(image_urls=[data_url]))
62+
llm_content.append(ImageContent(image_urls=[data_url]))
5963

60-
return content
64+
return llm_content
6165

6266

6367
# ============================================

0 commit comments

Comments
 (0)