diff --git a/openhands-sdk/openhands/sdk/agent/agent.py b/openhands-sdk/openhands/sdk/agent/agent.py index 12a103ea6c..6a066cc46f 100644 --- a/openhands-sdk/openhands/sdk/agent/agent.py +++ b/openhands-sdk/openhands/sdk/agent/agent.py @@ -28,7 +28,10 @@ TextContent, ThinkingBlock, ) -from openhands.sdk.llm.exceptions import FunctionCallValidationError +from openhands.sdk.llm.exceptions import ( + FunctionCallValidationError, + LLMContextWindowExceedError, +) from openhands.sdk.logger import get_logger from openhands.sdk.security.confirmation_policy import NeverConfirm from openhands.sdk.security.llm_analyzer import LLMSecurityAnalyzer @@ -168,22 +171,19 @@ def step( ) on_event(error_message) return - except Exception as e: - # If there is a condenser registered and the exception is a context window - # exceeded, we can recover by triggering a condensation request. + except LLMContextWindowExceedError: + # If condenser is available and handles requests, trigger condensation if ( self.condenser is not None and self.condenser.handles_condensation_requests() - and self.llm.is_context_window_exceeded_exception(e) ): logger.warning( "LLM raised context window exceeded error, triggering condensation" ) on_event(CondensationRequest()) return - # If the error isn't recoverable, keep propagating it up the stack. - else: - raise e + # No condenser available; re-raise for client handling + raise # LLMResponse already contains the converted message and metrics snapshot message: Message = llm_response.message diff --git a/openhands-sdk/openhands/sdk/llm/exceptions.py b/openhands-sdk/openhands/sdk/llm/exceptions.py deleted file mode 100644 index dc6c37ef4a..0000000000 --- a/openhands-sdk/openhands/sdk/llm/exceptions.py +++ /dev/null @@ -1,110 +0,0 @@ -class LLMError(Exception): - """Base class for all LLM-related exceptions.""" - - message: str - - def __init__(self, message: str) -> None: - super().__init__(message) - self.message = message - - def __str__(self) -> str: - return self.message - - -class LLMMalformedActionError(LLMError): - """Exception raised when the LLM response is malformed or does not conform to the expected format.""" # noqa: E501 - - def __init__(self, message: str = "Malformed response") -> None: - super().__init__(message) - - -class LLMNoActionError(LLMError): - """Exception raised when the LLM response does not include an action.""" - - def __init__(self, message: str = "Agent must return an action") -> None: - super().__init__(message) - - -class LLMResponseError(LLMError): - """Exception raised when the LLM response does not include an action or the action is not of the expected type.""" # noqa: E501 - - def __init__( - self, message: str = "Failed to retrieve action from LLM response" - ) -> None: - super().__init__(message) - - -class LLMNoResponseError(LLMError): - """Exception raised when the LLM does not return a response, typically seen in - Gemini models. - - This exception should be retried - Typically, after retry with a non-zero temperature, the LLM will return a response - """ - - def __init__( - self, - message: str = "LLM did not return a response. This is only seen in Gemini models so far.", # noqa: E501 - ) -> None: - super().__init__(message) - - -class LLMContextWindowExceedError(LLMError): - def __init__( - self, - message: str = "Conversation history longer than LLM context window limit. Consider turning on enable_history_truncation config to avoid this error", # noqa: E501 - ) -> None: - super().__init__(message) - - -# ============================================ -# LLM function calling Exceptions -# ============================================ - - -class FunctionCallConversionError(LLMError): - """Exception raised when FunctionCallingConverter failed to convert a non-function - call message to a function call message. - - This typically happens when there's a malformed message (e.g., missing - tags). But not due to LLM output. - """ - - def __init__(self, message: str) -> None: - super().__init__(message) - - -class FunctionCallValidationError(LLMError): - """Exception raised when FunctionCallingConverter failed to validate a function - call message. - - This typically happens when the LLM outputs unrecognized function call / - parameter names / values. - """ - - def __init__(self, message: str) -> None: - super().__init__(message) - - -class FunctionCallNotExistsError(LLMError): - """Exception raised when an LLM call a tool that is not registered.""" - - def __init__(self, message: str) -> None: - super().__init__(message) - - -# ============================================ -# Other Exceptions -# ============================================ - - -class UserCancelledError(Exception): - def __init__(self, message: str = "User cancelled the request") -> None: - super().__init__(message) - - -class OperationCancelled(Exception): - """Exception raised when an operation is cancelled (e.g. by a keyboard interrupt).""" # noqa: E501 - - def __init__(self, message: str = "Operation was cancelled") -> None: - super().__init__(message) diff --git a/openhands-sdk/openhands/sdk/llm/exceptions/__init__.py b/openhands-sdk/openhands/sdk/llm/exceptions/__init__.py new file mode 100644 index 0000000000..f933c02015 --- /dev/null +++ b/openhands-sdk/openhands/sdk/llm/exceptions/__init__.py @@ -0,0 +1,45 @@ +from .classifier import is_context_window_exceeded, looks_like_auth_error +from .mapping import map_provider_exception +from .types import ( + FunctionCallConversionError, + FunctionCallNotExistsError, + FunctionCallValidationError, + LLMAuthenticationError, + LLMBadRequestError, + LLMContextWindowExceedError, + LLMError, + LLMMalformedActionError, + LLMNoActionError, + LLMNoResponseError, + LLMRateLimitError, + LLMResponseError, + LLMServiceUnavailableError, + LLMTimeoutError, + OperationCancelled, + UserCancelledError, +) + + +__all__ = [ + # Types + "LLMError", + "LLMMalformedActionError", + "LLMNoActionError", + "LLMResponseError", + "FunctionCallConversionError", + "FunctionCallValidationError", + "FunctionCallNotExistsError", + "LLMNoResponseError", + "LLMContextWindowExceedError", + "LLMAuthenticationError", + "LLMRateLimitError", + "LLMTimeoutError", + "LLMServiceUnavailableError", + "LLMBadRequestError", + "UserCancelledError", + "OperationCancelled", + # Helpers + "is_context_window_exceeded", + "looks_like_auth_error", + "map_provider_exception", +] diff --git a/openhands-sdk/openhands/sdk/llm/exceptions/classifier.py b/openhands-sdk/openhands/sdk/llm/exceptions/classifier.py new file mode 100644 index 0000000000..7f49bd39b3 --- /dev/null +++ b/openhands-sdk/openhands/sdk/llm/exceptions/classifier.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from litellm.exceptions import BadRequestError, ContextWindowExceededError, OpenAIError + +from .types import LLMContextWindowExceedError + + +# Minimal, provider-agnostic context-window detection +LONG_PROMPT_PATTERNS: list[str] = [ + "contextwindowexceedederror", + "prompt is too long", + "input length and `max_tokens` exceed context limit", + "please reduce the length of", + "the request exceeds the available context size", + "context length exceeded", +] + + +def is_context_window_exceeded(exception: Exception) -> bool: + if isinstance(exception, (ContextWindowExceededError, LLMContextWindowExceedError)): + return True + + if not isinstance(exception, (BadRequestError, OpenAIError)): + return False + + s = str(exception).lower() + return any(p in s for p in LONG_PROMPT_PATTERNS) + + +AUTH_PATTERNS: list[str] = [ + "invalid api key", + "unauthorized", + "missing api key", + "invalid authentication", + "access denied", +] + + +def looks_like_auth_error(exception: Exception) -> bool: + if not isinstance(exception, (BadRequestError, OpenAIError)): + return False + s = str(exception).lower() + if any(p in s for p in AUTH_PATTERNS): + return True + # Some providers include explicit status codes in message text + for code in ("status 401", "status 403"): + if code in s: + return True + return False diff --git a/openhands-sdk/openhands/sdk/llm/exceptions/mapping.py b/openhands-sdk/openhands/sdk/llm/exceptions/mapping.py new file mode 100644 index 0000000000..8510eaaa57 --- /dev/null +++ b/openhands-sdk/openhands/sdk/llm/exceptions/mapping.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from litellm.exceptions import ( + APIConnectionError, + BadRequestError, + InternalServerError, + RateLimitError, + ServiceUnavailableError, + Timeout as LiteLLMTimeout, +) + +from .classifier import is_context_window_exceeded, looks_like_auth_error +from .types import ( + LLMAuthenticationError, + LLMBadRequestError, + LLMContextWindowExceedError, + LLMRateLimitError, + LLMServiceUnavailableError, + LLMTimeoutError, +) + + +def map_provider_exception(exception: Exception) -> Exception: + """ + Map provider/LiteLLM exceptions to SDK-typed exceptions. + + Returns original exception if no mapping applies. + """ + # Context window exceeded first (highest priority) + if is_context_window_exceeded(exception): + return LLMContextWindowExceedError(str(exception)) + + # Auth-like errors often appear as BadRequest/OpenAIError with specific text + if looks_like_auth_error(exception): + return LLMAuthenticationError(str(exception)) + + if isinstance(exception, RateLimitError): + return LLMRateLimitError(str(exception)) + + if isinstance(exception, LiteLLMTimeout): + return LLMTimeoutError(str(exception)) + + # Connectivity and service-side availability issues → service unavailable + if isinstance( + exception, (APIConnectionError, ServiceUnavailableError, InternalServerError) + ): + return LLMServiceUnavailableError(str(exception)) + + # Generic client-side 4xx errors + if isinstance(exception, BadRequestError): + return LLMBadRequestError(str(exception)) + + # Unknown: let caller re-raise original + return exception diff --git a/openhands-sdk/openhands/sdk/llm/exceptions/types.py b/openhands-sdk/openhands/sdk/llm/exceptions/types.py new file mode 100644 index 0000000000..e6d0522da9 --- /dev/null +++ b/openhands-sdk/openhands/sdk/llm/exceptions/types.py @@ -0,0 +1,101 @@ +class LLMError(Exception): + message: str + + def __init__(self, message: str) -> None: + super().__init__(message) + self.message = message + + def __str__(self) -> str: + return self.message + + +# General response parsing/validation errors +class LLMMalformedActionError(LLMError): + def __init__(self, message: str = "Malformed response") -> None: + super().__init__(message) + + +class LLMNoActionError(LLMError): + def __init__(self, message: str = "Agent must return an action") -> None: + super().__init__(message) + + +class LLMResponseError(LLMError): + def __init__( + self, message: str = "Failed to retrieve action from LLM response" + ) -> None: + super().__init__(message) + + +# Function-calling conversion/validation +class FunctionCallConversionError(LLMError): + def __init__(self, message: str) -> None: + super().__init__(message) + + +class FunctionCallValidationError(LLMError): + def __init__(self, message: str) -> None: + super().__init__(message) + + +class FunctionCallNotExistsError(LLMError): + def __init__(self, message: str) -> None: + super().__init__(message) + + +# Provider/transport related +class LLMNoResponseError(LLMError): + def __init__( + self, + message: str = ( + "LLM did not return a response. This is only seen in Gemini models so far." + ), + ) -> None: + super().__init__(message) + + +class LLMContextWindowExceedError(LLMError): + def __init__( + self, + message: str = ( + "Conversation history longer than LLM context window limit. " + "Consider enabling a condenser or shortening inputs." + ), + ) -> None: + super().__init__(message) + + +class LLMAuthenticationError(LLMError): + def __init__(self, message: str = "Invalid or missing API credentials") -> None: + super().__init__(message) + + +class LLMRateLimitError(LLMError): + def __init__(self, message: str = "Rate limit exceeded") -> None: + super().__init__(message) + + +class LLMTimeoutError(LLMError): + def __init__(self, message: str = "LLM request timed out") -> None: + super().__init__(message) + + +class LLMServiceUnavailableError(LLMError): + def __init__(self, message: str = "LLM service unavailable") -> None: + super().__init__(message) + + +class LLMBadRequestError(LLMError): + def __init__(self, message: str = "Bad request to LLM provider") -> None: + super().__init__(message) + + +# Other +class UserCancelledError(Exception): + def __init__(self, message: str = "User cancelled the request") -> None: + super().__init__(message) + + +class OperationCancelled(Exception): + def __init__(self, message: str = "Operation was cancelled") -> None: + super().__init__(message) diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py index 1972a2005f..b61ba0e07c 100644 --- a/openhands-sdk/openhands/sdk/llm/llm.py +++ b/openhands-sdk/openhands/sdk/llm/llm.py @@ -44,10 +44,7 @@ ) from litellm.exceptions import ( APIConnectionError, - BadRequestError, - ContextWindowExceededError, InternalServerError, - OpenAIError, RateLimitError, ServiceUnavailableError, Timeout as LiteLLMTimeout, @@ -62,7 +59,10 @@ token_counter, ) -from openhands.sdk.llm.exceptions import LLMNoResponseError +from openhands.sdk.llm.exceptions import ( + LLMNoResponseError, + map_provider_exception, +) # OpenHands utilities from openhands.sdk.llm.llm_response import LLMResponse @@ -513,6 +513,9 @@ def _one_attempt(**retry_kwargs) -> ModelResponse: ) except Exception as e: self._telemetry.on_error(e) + mapped = map_provider_exception(e) + if mapped is not e: + raise mapped from e raise # ========================================================================= @@ -632,6 +635,9 @@ def _one_attempt(**retry_kwargs) -> ResponsesAPIResponse: ) except Exception as e: self._telemetry.on_error(e) + mapped = map_provider_exception(e) + if mapped is not e: + raise mapped from e raise # ========================================================================= @@ -1015,51 +1021,3 @@ def resolve_diff_from_deserialized(self, persisted: LLM) -> LLM: f"Diff: {pretty_pydantic_diff(self, reconciled)}" ) return reconciled - - @staticmethod - def is_context_window_exceeded_exception(exception: Exception) -> bool: - """Check if the exception indicates a context window exceeded error. - - Context window exceeded errors vary by provider, and LiteLLM does not do a - consistent job of identifying and wrapping them. - """ - # A context window exceeded error from litellm is the best signal we have. - if isinstance(exception, ContextWindowExceededError): - return True - - # But with certain providers the exception might be a bad request or generic - # OpenAI error, and we have to use the content of the error to figure out what - # is wrong. - if not isinstance(exception, (BadRequestError, OpenAIError)): - return False - - # Not all BadRequestError or OpenAIError are context window exceeded errors, so - # we need to check the message content for known patterns. - error_string = str(exception).lower() - - known_exception_patterns: list[str] = [ - "contextwindowexceedederror", - "prompt is too long", - "input length and `max_tokens` exceed context limit", - "please reduce the length of", - "the request exceeds the available context size", - "context length exceeded", - ] - - if any(pattern in error_string for pattern in known_exception_patterns): - return True - - # A special case for SambaNova, where multiple patterns are needed - # simultaneously. - samba_nova_patterns: list[str] = [ - "sambanovaexception", - "maximum context length", - ] - - if all(pattern in error_string for pattern in samba_nova_patterns): - return True - - # If we've made it this far and haven't managed to positively ID it as a context - # window exceeded error, we'll have to assume it's not and rely on the call-site - # context to handle it appropriately. - return False diff --git a/tests/sdk/agent/test_agent_context_window_condensation.py b/tests/sdk/agent/test_agent_context_window_condensation.py new file mode 100644 index 0000000000..8711998b71 --- /dev/null +++ b/tests/sdk/agent/test_agent_context_window_condensation.py @@ -0,0 +1,64 @@ +import pytest +from pydantic import PrivateAttr + +from openhands.sdk.agent import Agent +from openhands.sdk.context.condenser.base import CondenserBase +from openhands.sdk.context.view import View +from openhands.sdk.conversation import Conversation +from openhands.sdk.event.condenser import CondensationRequest +from openhands.sdk.llm import LLM +from openhands.sdk.llm.exceptions import LLMContextWindowExceedError + + +class RaisingLLM(LLM): + _force_responses: bool = PrivateAttr(default=False) + + def __init__(self, *, model: str = "test-model", force_responses: bool = False): + super().__init__(model=model, usage_id="test-llm") + self._force_responses = force_responses + + def uses_responses_api(self) -> bool: # override gating + return self._force_responses + + def completion(self, *, messages, tools=None, **kwargs): # type: ignore[override] + raise LLMContextWindowExceedError() + + def responses(self, *, messages, tools=None, **kwargs): # type: ignore[override] + raise LLMContextWindowExceedError() + + +class HandlesRequestsCondenser(CondenserBase): + def condense(self, view: View): # pragma: no cover - trivial passthrough + return view + + def handles_condensation_requests(self) -> bool: + return True + + +@pytest.mark.parametrize("force_responses", [True, False]) +def test_agent_triggers_condensation_request_when_ctx_exceeded_with_condenser( + force_responses: bool, +): + llm = RaisingLLM(force_responses=force_responses) + agent = Agent(llm=llm, tools=[], condenser=HandlesRequestsCondenser()) + convo = Conversation(agent=agent) + + seen = [] + + def on_event(e): + seen.append(e) + + # Expect Agent to emit CondensationRequest and not raise + agent.step(convo, on_event=on_event) + + assert any(isinstance(e, CondensationRequest) for e in seen) + + +@pytest.mark.parametrize("force_responses", [True, False]) +def test_agent_raises_ctx_exceeded_when_no_condenser(force_responses: bool): + llm = RaisingLLM(force_responses=force_responses) + agent = Agent(llm=llm, tools=[], condenser=None) + convo = Conversation(agent=agent) + + with pytest.raises(LLMContextWindowExceedError): + agent.step(convo, on_event=lambda e: None) diff --git a/tests/sdk/llm/test_api_connection_error_retry.py b/tests/sdk/llm/test_api_connection_error_retry.py index 7209262088..315490d18f 100644 --- a/tests/sdk/llm/test_api_connection_error_retry.py +++ b/tests/sdk/llm/test_api_connection_error_retry.py @@ -6,6 +6,7 @@ from pydantic import SecretStr from openhands.sdk.llm import LLM, LLMResponse, Message, TextContent +from openhands.sdk.llm.exceptions import LLMServiceUnavailableError def create_mock_response(content: str = "Test response", response_id: str = "test-id"): @@ -80,7 +81,7 @@ def test_completion_retries_api_connection_error( def test_completion_max_retries_api_connection_error( mock_litellm_completion, default_config ): - """Test that APIConnectionError respects max retries.""" + """Test that APIConnectionError respects max retries and is mapped to SDK error.""" # Mock the litellm_completion to raise APIConnectionError multiple times mock_litellm_completion.side_effect = [ APIConnectionError( @@ -110,8 +111,9 @@ def test_completion_max_retries_api_connection_error( usage_id="test-service", ) - # The completion should raise an APIConnectionError after exhausting all retries - with pytest.raises(APIConnectionError) as excinfo: + # The completion should raise an SDK typed error after exhausting all retries + + with pytest.raises(LLMServiceUnavailableError) as excinfo: llm.completion( messages=[Message(role="user", content=[TextContent(text="Hello!")])], ) @@ -123,6 +125,9 @@ def test_completion_max_retries_api_connection_error( # The exception should contain connection error information assert "API connection error" in str(excinfo.value) + # Ensure the original provider exception is preserved as the cause + assert isinstance(excinfo.value.__cause__, APIConnectionError) + @patch("openhands.sdk.llm.llm.litellm_completion") def test_completion_no_retry_on_success(mock_litellm_completion, default_config): diff --git a/tests/sdk/llm/test_exception.py b/tests/sdk/llm/test_exception.py index 8ac1c63914..c15620c49d 100644 --- a/tests/sdk/llm/test_exception.py +++ b/tests/sdk/llm/test_exception.py @@ -61,9 +61,7 @@ def test_llm_context_window_exceed_error_default(): error = LLMContextWindowExceedError() expected_message = "Conversation history longer than LLM context window limit. " - expected_message += ( - "Consider turning on enable_history_truncation config to avoid this error" - ) + expected_message += "Consider enabling a condenser or shortening inputs." assert str(error) == expected_message assert error.message == expected_message diff --git a/tests/sdk/llm/test_exception_classifier.py b/tests/sdk/llm/test_exception_classifier.py new file mode 100644 index 0000000000..08c76dbbac --- /dev/null +++ b/tests/sdk/llm/test_exception_classifier.py @@ -0,0 +1,46 @@ +from litellm.exceptions import BadRequestError, ContextWindowExceededError + +from openhands.sdk.llm.exceptions import ( + is_context_window_exceeded, + looks_like_auth_error, +) + + +MODEL = "test-model" +PROVIDER = "test-provider" + + +def test_is_context_window_exceeded_direct_type(): + assert ( + is_context_window_exceeded(ContextWindowExceededError("boom", MODEL, PROVIDER)) + is True + ) + + +def test_is_context_window_exceeded_via_text(): + # BadRequest containing context-window-ish text should be detected + e = BadRequestError( + "The request exceeds the available context size", MODEL, PROVIDER + ) + assert is_context_window_exceeded(e) is True + + +def test_is_context_window_exceeded_negative(): + assert ( + is_context_window_exceeded(BadRequestError("irrelevant", MODEL, PROVIDER)) + is False + ) + + +def test_looks_like_auth_error_positive(): + assert ( + looks_like_auth_error(BadRequestError("Invalid API key", MODEL, PROVIDER)) + is True + ) + + +def test_looks_like_auth_error_negative(): + assert ( + looks_like_auth_error(BadRequestError("Something else", MODEL, PROVIDER)) + is False + ) diff --git a/tests/sdk/llm/test_exception_mapping.py b/tests/sdk/llm/test_exception_mapping.py new file mode 100644 index 0000000000..af23e439fa --- /dev/null +++ b/tests/sdk/llm/test_exception_mapping.py @@ -0,0 +1,41 @@ +from litellm.exceptions import BadRequestError + +from openhands.sdk.llm.exceptions import ( + LLMAuthenticationError, + LLMBadRequestError, + map_provider_exception, +) + + +MODEL = "test-model" +PROVIDER = "test-provider" + + +def test_map_auth_error_from_bad_request(): + e = BadRequestError("Invalid API key provided", MODEL, PROVIDER) + mapped = map_provider_exception(e) + assert isinstance(mapped, LLMAuthenticationError) + + +def test_map_auth_error_from_openai_error(): + # OpenAIError has odd behavior; create a BadRequestError that wraps an + # auth-like message instead, as providers commonly route auth issues + # through BadRequestError in LiteLLM + e = BadRequestError("status 401 Unauthorized: missing API key", MODEL, PROVIDER) + mapped = map_provider_exception(e) + assert isinstance(mapped, LLMAuthenticationError) + + +def test_map_generic_bad_request(): + e = BadRequestError("Some client-side error not related to auth", MODEL, PROVIDER) + mapped = map_provider_exception(e) + assert isinstance(mapped, LLMBadRequestError) + + +def test_passthrough_unknown_exception(): + class MyCustom(Exception): + pass + + e = MyCustom("random") + mapped = map_provider_exception(e) + assert mapped is e