OpenHands
diff --git a/‎examples/01_standalone_sdk/02_custom_tools.py‎
Lines changed: 4 additions & 2 deletions b/‎examples/01_standalone_sdk/02_custom_tools.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎openhands-sdk/openhands/sdk/mcp/definition.py‎
Lines changed: 20 additions & 30 deletions b/‎openhands-sdk/openhands/sdk/mcp/definition.py‎
Lines changed: 20 additions & 30 deletions
diff --git a/‎openhands-sdk/openhands/sdk/mcp/tool.py‎
Lines changed: 4 additions & 5 deletions b/‎openhands-sdk/openhands/sdk/mcp/tool.py‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎openhands-sdk/openhands/sdk/tool/builtins/finish.py‎
Lines changed: 7 additions & 9 deletions b/‎openhands-sdk/openhands/sdk/tool/builtins/finish.py‎
Lines changed: 7 additions & 9 deletions
diff --git a/‎openhands-sdk/openhands/sdk/tool/builtins/think.py‎
Lines changed: 7 additions & 13 deletions b/‎openhands-sdk/openhands/sdk/tool/builtins/think.py‎
Lines changed: 7 additions & 13 deletions
diff --git a/‎openhands-sdk/openhands/sdk/tool/schema.py‎
Lines changed: 75 additions & 11 deletions b/‎openhands-sdk/openhands/sdk/tool/schema.py‎
Lines changed: 75 additions & 11 deletions
diff --git a/‎openhands-tools/openhands/tools/browser_use/definition.py‎
Lines changed: 13 additions & 9 deletions b/‎openhands-tools/openhands/tools/browser_use/definition.py‎
Lines changed: 13 additions & 9 deletions
@@ -92,8 +92,10 @@ def __call__(self, action: GrepAction, conversation=None) -> GrepObservation:  #
         files: set[str] = set()
 
         # grep returns exit code 1 when no matches; treat as empty
-        if result.output.strip():
-            for line in result.output.strip().splitlines():
+        output_text = result.text
+
+        if output_text.strip():
+            for line in output_text.strip().splitlines():
                 matches.append(line)
                 # Expect "path:line:content" — take the file part before first ":"
                 file_path = line.split(":", 1)[0]
 
@@ -1,7 +1,6 @@
 """MCPTool definition and implementation."""
 
 import json
-from collections.abc import Sequence
 from typing import Any
 
 import mcp.types
@@ -51,28 +50,23 @@ def to_mcp_arguments(self) -> dict:
 class MCPToolObservation(Observation):
     """Observation from MCP tool execution."""
 
-    content: list[TextContent | ImageContent] = Field(
-        default_factory=list,
-        description="Content returned from the MCP tool converted "
-        "to LLM Ready TextContent or ImageContent",
-    )
-    is_error: bool = Field(
-        default=False, description="Whether the call resulted in an error"
-    )
     tool_name: str = Field(description="Name of the tool that was called")
 
     @classmethod
     def from_call_tool_result(
         cls, tool_name: str, result: mcp.types.CallToolResult
     ) -> "MCPToolObservation":
         """Create an MCPToolObservation from a CallToolResult."""
-        content: list[mcp.types.ContentBlock] = result.content
-        convrted_content = []
-        for block in content:
+
+        native_content: list[mcp.types.ContentBlock] = result.content
+        content: list[TextContent | ImageContent] = [
+            TextContent(text=f"[Tool '{tool_name}' executed.]")
+        ]
+        for block in native_content:
             if isinstance(block, mcp.types.TextContent):
-                convrted_content.append(TextContent(text=block.text))
+                content.append(TextContent(text=block.text))
             elif isinstance(block, mcp.types.ImageContent):
-                convrted_content.append(
+                content.append(
                     ImageContent(
                         image_urls=[f"data:{block.mimeType};base64,{block.data}"],
                     )
@@ -81,36 +75,32 @@ def from_call_tool_result(
                 logger.warning(
                     f"Unsupported MCP content block type: {type(block)}. Ignoring."
                 )
+
         return cls(
-            content=convrted_content,
+            content=content,
             is_error=result.isError,
             tool_name=tool_name,
         )
 
-    @property
-    def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
-        """Format the observation for agent display."""
-        initial_message = f"[Tool '{self.tool_name}' executed.]\n"
-        if self.is_error:
-            initial_message += "[An error occurred during execution.]\n"
-        return [TextContent(text=initial_message)] + self.content
-
     @property
     def visualize(self) -> Text:
         """Return Rich Text representation of this observation."""
-        content = Text()
-        content.append(f"[MCP Tool '{self.tool_name}' Observation]\n", style="bold")
+        text = Text()
+
         if self.is_error:
-            content.append("[Error during execution]\n", style="bold red")
+            text.append("❌ ", style="red bold")
+            text.append(self.ERROR_MESSAGE_HEADER, style="bold red")
+
+        text.append(f"[MCP Tool '{self.tool_name}' Observation]\n", style="bold")
         for block in self.content:
             if isinstance(block, TextContent):
                 # try to see if block.text is a JSON
                 try:
                     parsed = json.loads(block.text)
-                    content.append(display_dict(parsed))
+                    text.append(display_dict(parsed))
                     continue
                 except (json.JSONDecodeError, TypeError):
-                    content.append(block.text + "\n")
+                    text.append(block.text + "\n")
             elif isinstance(block, ImageContent):
-                content.append(f"[Image with {len(block.image_urls)} URLs]\n")
-        return content
+                text.append(f"[Image with {len(block.image_urls)} URLs]\n")
+        return text
@@ -12,7 +12,6 @@
 from litellm import ChatCompletionToolParam
 from pydantic import Field, ValidationError
 
-from openhands.sdk.llm import TextContent
 from openhands.sdk.logger import get_logger
 from openhands.sdk.mcp.client import MCPClient
 from openhands.sdk.mcp.definition import MCPToolAction, MCPToolObservation
@@ -69,8 +68,8 @@ async def call_tool(self, action: MCPToolAction) -> MCPToolObservation:
             except Exception as e:
                 error_msg = f"Error calling MCP tool {self.tool_name}: {str(e)}"
                 logger.error(error_msg, exc_info=True)
-                return MCPToolObservation(
-                    content=[TextContent(text=error_msg)],
+                return MCPToolObservation.from_text(
+                    text=error_msg,
                     is_error=True,
                     tool_name=self.tool_name,
                 )
@@ -154,8 +153,8 @@ def __call__(
             # Surface validation errors as an observation instead of crashing
             error_msg = f"Validation error for MCP tool '{self.name}' args: {e}"
             logger.error(error_msg, exc_info=True)
-            return MCPToolObservation(
-                content=[TextContent(text=error_msg)],
+            return MCPToolObservation.from_text(
+                text=error_msg,
                 is_error=True,
                 tool_name=self.name,
             )
 
@@ -4,7 +4,6 @@
 from pydantic import Field
 from rich.text import Text
 
-from openhands.sdk.llm.message import ImageContent, TextContent
 from openhands.sdk.tool.tool import (
     Action,
     Observation,
@@ -32,16 +31,15 @@ def visualize(self) -> Text:
 
 
 class FinishObservation(Observation):
-    message: str = Field(description="Final message sent to the user.")
-
-    @property
-    def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
-        return [TextContent(text=self.message)]
+    """
+    Observation returned after finishing a task.
+    The FinishAction itself contains the message sent to the user so no
+    extra fields are needed here.
+    """
 
     @property
     def visualize(self) -> Text:
-        """Return Rich Text representation - empty since action shows the message."""
-        # Don't duplicate the finish message display - action already shows it
+        """Return an empty Text representation since the message is in the action."""
         return Text()
 
 
@@ -65,7 +63,7 @@ def __call__(
         action: FinishAction,
         conversation: "BaseConversation | None" = None,  # noqa: ARG002
     ) -> FinishObservation:
-        return FinishObservation(message=action.message)
+        return FinishObservation.from_text(text=action.message)
 
 
 class FinishTool(ToolDefinition[FinishAction, FinishObservation]):
 
@@ -4,7 +4,6 @@
 from pydantic import Field
 from rich.text import Text
 
-from openhands.sdk.llm.message import ImageContent, TextContent
 from openhands.sdk.tool.tool import (
     Action,
     Observation,
@@ -46,20 +45,15 @@ def visualize(self) -> Text:
 
 
 class ThinkObservation(Observation):
-    """Observation returned after logging a thought."""
-
-    content: str = Field(
-        default="Your thought has been logged.", description="Confirmation message."
-    )
-
-    @property
-    def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
-        return [TextContent(text=self.content)]
+    """
+    Observation returned after logging a thought.
+    The ThinkAction itself contains the thought logged so no extra
+    fields are needed here.
+    """
 
     @property
     def visualize(self) -> Text:
-        """Return Rich Text representation - empty since action shows the thought."""
-        # Don't duplicate the thought display - action already shows it
+        """Return an empty Text representation since the thought is in the action."""
         return Text()
 
 
@@ -81,7 +75,7 @@ def __call__(
         _: ThinkAction,
         conversation: "BaseConversation | None" = None,  # noqa: ARG002
     ) -> ThinkObservation:
-        return ThinkObservation()
+        return ThinkObservation.from_text(text="Your thought has been logged.")
 
 
 class ThinkTool(ToolDefinition[ThinkAction, ThinkObservation]):
 
@@ -1,6 +1,6 @@
-from abc import ABC, abstractmethod
+from abc import ABC
 from collections.abc import Sequence
-from typing import Any, ClassVar, TypeVar
+from typing import TYPE_CHECKING, Any, ClassVar, TypeVar
 
 from pydantic import ConfigDict, Field, create_model
 from rich.text import Text
@@ -13,6 +13,9 @@
 from openhands.sdk.utils.visualize import display_dict
 
 
+if TYPE_CHECKING:
+    from typing import Self
+
 S = TypeVar("S", bound="Schema")
 
 
@@ -190,23 +193,84 @@ def visualize(self) -> Text:
 class Observation(Schema, ABC):
     """Base schema for output observation."""
 
+    ERROR_MESSAGE_HEADER: ClassVar[str] = "[An error occurred during execution.]\n"
+
+    content: list[TextContent | ImageContent] = Field(
+        default_factory=list,
+        description=(
+            "Content returned from the tool as a list of "
+            "TextContent/ImageContent objects. "
+            "When there is an error, it should be written in this field."
+        ),
+    )
+    is_error: bool = Field(
+        default=False, description="Whether the observation indicates an error"
+    )
+
+    @classmethod
+    def from_text(
+        cls,
+        text: str,
+        is_error: bool = False,
+        **kwargs: Any,
+    ) -> "Self":
+        """Utility to create an Observation from a simple text string.
+
+        Args:
+            text: The text content to include in the observation.
+            is_error: Whether this observation represents an error.
+            **kwargs: Additional fields for the observation subclass.
+
+        Returns:
+            An Observation instance with the text wrapped in a TextContent.
+        """
+        return cls(content=[TextContent(text=text)], is_error=is_error, **kwargs)
+
+    @property
+    def text(self) -> str:
+        """Extract all text content from the observation.
+
+        Returns:
+            Concatenated text from all TextContent items in content.
+        """
+        return "".join(
+            item.text for item in self.content if isinstance(item, TextContent)
+        )
+
     @property
-    @abstractmethod
     def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
-        """Get the observation string to show to the agent."""
+        """
+        Default content formatting for converting observation to LLM readable content.
+        Subclasses can override to provide richer content (e.g., images, diffs).
+        """
+        llm_content: list[TextContent | ImageContent] = []
+
+        # If is_error is true, prepend error message
+        if self.is_error:
+            llm_content.append(TextContent(text=self.ERROR_MESSAGE_HEADER))
+
+        # Add content (now always a list)
+        llm_content.extend(self.content)
+
+        return llm_content
 
     @property
     def visualize(self) -> Text:
-        """Return Rich Text representation of this action.
+        """Return Rich Text representation of this observation.
 
-        This method can be overridden by subclasses to customize visualization.
-        The base implementation displays all action fields systematically.
+        Subclasses can override for custom visualization; by default we show the
+        same text that would be sent to the LLM.
         """
-        content = Text()
+        text = Text()
+
+        if self.is_error:
+            text.append("❌ ", style="red bold")
+            text.append(self.ERROR_MESSAGE_HEADER, style="bold red")
+
         text_parts = content_to_str(self.to_llm_content)
         if text_parts:
             full_content = "".join(text_parts)
-            content.append(full_content)
+            text.append(full_content)
         else:
-            content.append("[no text content]")
-        return content
+            text.append("[no text content]")
+        return text
@@ -28,20 +28,24 @@
 class BrowserObservation(Observation):
     """Base observation for browser operations."""
 
-    output: str = Field(description="The output message from the browser operation")
-    error: str | None = Field(default=None, description="Error message if any")
     screenshot_data: str | None = Field(
         default=None, description="Base64 screenshot data if available"
     )
 
     @property
     def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
-        if self.error:
-            return [TextContent(text=f"Error: {self.error}")]
+        llm_content: list[TextContent | ImageContent] = []
 
-        content: list[TextContent | ImageContent] = [
-            TextContent(text=maybe_truncate(self.output, MAX_BROWSER_OUTPUT_SIZE))
-        ]
+        # If is_error is true, prepend error message
+        if self.is_error:
+            llm_content.append(TextContent(text=self.ERROR_MESSAGE_HEADER))
+
+        # Get text content and truncate if needed
+        content_text = self.text
+        if content_text:
+            llm_content.append(
+                TextContent(text=maybe_truncate(content_text, MAX_BROWSER_OUTPUT_SIZE))
+            )
 
         if self.screenshot_data:
             mime_type = "image/png"
@@ -55,9 +59,9 @@ def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
                 mime_type = "image/webp"
             # Convert base64 to data URL format for ImageContent
             data_url = f"data:{mime_type};base64,{self.screenshot_data}"
-            content.append(ImageContent(image_urls=[data_url]))
+            llm_content.append(ImageContent(image_urls=[data_url]))
 
-        return content
+        return llm_content
 
 
 # ============================================