From 7090060b68324262c2f9fdb20dfacfd2b283fa15 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Mon, 11 Aug 2025 14:46:22 -0400 Subject: [PATCH 01/73] select changes from wip-v0.4/core --- libs/core/langchain_core/messages/__init__.py | 66 + libs/core/langchain_core/messages/ai.py | 47 +- .../langchain_core/messages/content_blocks.py | 1453 ++++++++++++++++- libs/core/langchain_core/messages/modifier.py | 2 +- libs/core/langchain_core/messages/tool.py | 92 +- libs/core/langchain_core/messages/utils.py | 2 +- .../output_parsers/transform.py | 4 +- libs/core/langchain_core/runnables/base.py | 16 +- libs/core/langchain_core/runnables/config.py | 4 +- libs/core/langchain_core/runnables/graph.py | 6 +- .../langchain_core/utils/function_calling.py | 2 +- libs/core/pyproject.toml | 2 + .../tests/unit_tests/messages/test_imports.py | 22 + .../tests/unit_tests/messages/test_utils.py | 31 +- .../prompts/__snapshots__/test_chat.ambr | 82 +- .../runnables/__snapshots__/test_graph.ambr | 41 +- libs/core/tests/unit_tests/test_messages.py | 325 +++- 17 files changed, 1939 insertions(+), 258 deletions(-) diff --git a/libs/core/langchain_core/messages/__init__.py b/libs/core/langchain_core/messages/__init__.py index fe87e964af291..410299ea5b553 100644 --- a/libs/core/langchain_core/messages/__init__.py +++ b/libs/core/langchain_core/messages/__init__.py @@ -33,9 +33,31 @@ ) from langchain_core.messages.chat import ChatMessage, ChatMessageChunk from langchain_core.messages.content_blocks import ( + Annotation, + AudioContentBlock, + Citation, + CodeInterpreterCall, + CodeInterpreterOutput, + CodeInterpreterResult, + ContentBlock, + DataContentBlock, + FileContentBlock, + ImageContentBlock, + NonStandardAnnotation, + NonStandardContentBlock, + PlainTextContentBlock, + ReasoningContentBlock, + TextContentBlock, + VideoContentBlock, + WebSearchCall, + WebSearchResult, convert_to_openai_data_block, convert_to_openai_image_block, is_data_content_block, + is_reasoning_block, + is_text_block, + is_tool_call_block, + is_tool_call_chunk, ) from langchain_core.messages.function import FunctionMessage, FunctionMessageChunk from langchain_core.messages.human import HumanMessage, HumanMessageChunk @@ -65,24 +87,42 @@ __all__ = ( "AIMessage", "AIMessageChunk", + "Annotation", "AnyMessage", + "AudioContentBlock", "BaseMessage", "BaseMessageChunk", "ChatMessage", "ChatMessageChunk", + "Citation", + "CodeInterpreterCall", + "CodeInterpreterOutput", + "CodeInterpreterResult", + "ContentBlock", + "DataContentBlock", + "FileContentBlock", "FunctionMessage", "FunctionMessageChunk", "HumanMessage", "HumanMessageChunk", + "ImageContentBlock", "InvalidToolCall", "MessageLikeRepresentation", + "NonStandardAnnotation", + "NonStandardContentBlock", + "PlainTextContentBlock", + "ReasoningContentBlock", "RemoveMessage", "SystemMessage", "SystemMessageChunk", + "TextContentBlock", "ToolCall", "ToolCallChunk", "ToolMessage", "ToolMessageChunk", + "VideoContentBlock", + "WebSearchCall", + "WebSearchResult", "_message_from_dict", "convert_to_messages", "convert_to_openai_data_block", @@ -91,6 +131,10 @@ "filter_messages", "get_buffer_string", "is_data_content_block", + "is_reasoning_block", + "is_text_block", + "is_tool_call_block", + "is_tool_call_chunk", "merge_content", "merge_message_runs", "message_chunk_to_message", @@ -103,25 +147,43 @@ _dynamic_imports = { "AIMessage": "ai", "AIMessageChunk": "ai", + "Annotation": "content_blocks", + "AudioContentBlock": "content_blocks", "BaseMessage": "base", "BaseMessageChunk": "base", "merge_content": "base", "message_to_dict": "base", "messages_to_dict": "base", + "Citation": "content_blocks", + "ContentBlock": "content_blocks", "ChatMessage": "chat", "ChatMessageChunk": "chat", + "CodeInterpreterCall": "content_blocks", + "CodeInterpreterOutput": "content_blocks", + "CodeInterpreterResult": "content_blocks", + "DataContentBlock": "content_blocks", + "FileContentBlock": "content_blocks", "FunctionMessage": "function", "FunctionMessageChunk": "function", "HumanMessage": "human", "HumanMessageChunk": "human", + "NonStandardAnnotation": "content_blocks", + "NonStandardContentBlock": "content_blocks", + "PlainTextContentBlock": "content_blocks", + "ReasoningContentBlock": "content_blocks", "RemoveMessage": "modifier", "SystemMessage": "system", "SystemMessageChunk": "system", + "WebSearchCall": "content_blocks", + "WebSearchResult": "content_blocks", + "ImageContentBlock": "content_blocks", "InvalidToolCall": "tool", + "TextContentBlock": "content_blocks", "ToolCall": "tool", "ToolCallChunk": "tool", "ToolMessage": "tool", "ToolMessageChunk": "tool", + "VideoContentBlock": "content_blocks", "AnyMessage": "utils", "MessageLikeRepresentation": "utils", "_message_from_dict": "utils", @@ -132,6 +194,10 @@ "filter_messages": "utils", "get_buffer_string": "utils", "is_data_content_block": "content_blocks", + "is_reasoning_block": "content_blocks", + "is_text_block": "content_blocks", + "is_tool_call_block": "content_blocks", + "is_tool_call_chunk": "content_blocks", "merge_message_runs": "utils", "message_chunk_to_message": "utils", "messages_from_dict": "utils", diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index c81187dc3f69b..45eb7d2ba5e8d 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -8,11 +8,7 @@ from pydantic import model_validator from typing_extensions import NotRequired, Self, TypedDict, override -from langchain_core.messages.base import ( - BaseMessage, - BaseMessageChunk, - merge_content, -) +from langchain_core.messages.base import BaseMessage, BaseMessageChunk, merge_content from langchain_core.messages.tool import ( InvalidToolCall, ToolCall, @@ -20,23 +16,26 @@ default_tool_chunk_parser, default_tool_parser, ) -from langchain_core.messages.tool import ( - invalid_tool_call as create_invalid_tool_call, -) -from langchain_core.messages.tool import ( - tool_call as create_tool_call, -) -from langchain_core.messages.tool import ( - tool_call_chunk as create_tool_call_chunk, -) +from langchain_core.messages.tool import invalid_tool_call as create_invalid_tool_call +from langchain_core.messages.tool import tool_call as create_tool_call +from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk from langchain_core.utils._merge import merge_dicts, merge_lists from langchain_core.utils.json import parse_partial_json from langchain_core.utils.usage import _dict_int_op logger = logging.getLogger(__name__) +_LC_AUTO_PREFIX = "lc_" +"""LangChain auto-generated ID prefix for messages and content blocks.""" -_LC_ID_PREFIX = "run-" +_LC_ID_PREFIX = f"{_LC_AUTO_PREFIX}run-" +"""Internal tracing/callback system identifier. + +Used for: +- Tracing. Every LangChain operation (LLM call, chain execution, tool use, etc.) + gets a unique run_id (UUID) +- Enables tracking parent-child relationships between operations +""" class InputTokenDetails(TypedDict, total=False): @@ -428,17 +427,27 @@ def add_ai_message_chunks( chunk_id = None candidates = [left.id] + [o.id for o in others] - # first pass: pick the first non-run-* id + # first pass: pick the first provider-assigned id (non-run-* and non-lc_*) for id_ in candidates: - if id_ and not id_.startswith(_LC_ID_PREFIX): + if ( + id_ + and not id_.startswith(_LC_ID_PREFIX) + and not id_.startswith(_LC_AUTO_PREFIX) + ): chunk_id = id_ break else: - # second pass: no provider-assigned id found, just take the first non-null + # second pass: prefer lc_run-* ids over lc_* ids for id_ in candidates: - if id_: + if id_ and id_.startswith(_LC_ID_PREFIX): chunk_id = id_ break + else: + # third pass: take any remaining id (auto-generated lc_* ids) + for id_ in candidates: + if id_: + chunk_id = id_ + break return left.__class__( example=left.example, diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index 83a66fb123a42..d426339156d39 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -1,110 +1,967 @@ -"""Types for content blocks.""" +"""Standard, multimodal content blocks for Large Language Model I/O. + +.. warning:: + This module is under active development. The API is unstable and subject to + change in future releases. + +This module provides a standardized data structure for representing inputs to and +outputs from LLMs. The core abstraction is the **Content Block**, a ``TypedDict`` that +can represent a piece of text, an image, a tool call, or other structured data. + +**Rationale** + +Different LLM providers use distinct and incompatible API schemas. This module +introduces a unified, provider-agnostic format to standardize these interactions. A +message to or from a model is simply a ``list`` of ``ContentBlock`` objects, allowing +for the natural interleaving of text, images, and other content in a single, ordered +sequence. + +An adapter for a specific provider is responsible for translating this standard list of +blocks into the format required by its API. + +**Extensibility** + +Data **not yet mapped** to a standard block may be represented using the +``NonStandardContentBlock``, which allows for provider-specific data to be included +without losing the benefits of type checking and validation. + +Furthermore, provider-specific fields **within** a standard block are fully supported +by default in the ``extras`` field of each block. This allows for additional metadata +to be included without breaking the standard structure. + +Following widespread adoption of `PEP 728 `__, we will add +``extra_items=Any`` as a param to Content Blocks. This will signify to type checkers +that additional provider-specific fields are allowed outside of the ``extras`` field, +and that will become the new standard approach to adding provider-specific metadata. + +.. warning:: + Do not heavily rely on the ``extras`` field for provider-specific data! This field + is subject to deprecation in future releases as we move towards PEP 728. + +**Example with PEP 728 provider-specific fields:** + +.. code-block:: python + + # Content block definition + # NOTE: `extra_items=Any` + class TextContentBlock(TypedDict, extra_items=Any): + type: Literal["text"] + id: NotRequired[str] + text: str + annotations: NotRequired[list[Annotation]] + index: NotRequired[int] + +.. code-block:: python + + from langchain_core.messages.content_blocks import TextContentBlock + + # Create a text content block with provider-specific fields + my_block: TextContentBlock = { + # Add required fields + "type": "text", + "text": "Hello, world!", + # Additional fields not specified in the TypedDict + # These are valid with PEP 728 and are typed as Any + "openai_metadata": {"model": "gpt-4", "temperature": 0.7}, + "anthropic_usage": {"input_tokens": 10, "output_tokens": 20}, + "custom_field": "any value", + } + + # Mutating an existing block to add provider-specific fields + openai_data = my_block["openai_metadata"] # Type: Any + +.. note:: + PEP 728 is enabled with ``# type: ignore[call-arg]`` comments to suppress warnings + from type checkers that don't yet support it. The functionality works correctly + in Python 3.13+ and will be fully supported as the ecosystem catches up. + +**Key Block Types** + +The module defines several types of content blocks, including: + +- ``TextContentBlock``: Standard text. +- ``ImageContentBlock``, ``Audio...``, ``Video...``, ``PlainText...``, ``File...``: For multimodal data. +- ``ToolCallContentBlock``: For function calling. +- ``ReasoningContentBlock``: To capture a model's thought process. +- ``Citation``: For annotations that link generated text to a source document. + +**Example Usage** + +.. code-block:: python + + # Direct construction: + from langchain_core.messages.content_blocks import TextContentBlock, ImageContentBlock + + multimodal_message: AIMessage = [ + TextContentBlock(type="text", text="What is shown in this image?"), + ImageContentBlock( + type="image", + url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png", + mime_type="image/png", + ), + ] + + from langchain_core.messages.content_blocks import create_text_block, create_image_block + + # Using factory functions: + multimodal_message: AIMessage = [ + create_text_block("What is shown in this image?"), + create_image_block( + url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png", + mime_type="image/png", + ), + ] + +Factory functions like ``create_text_block`` and ``create_image_block`` are provided +and offer benefits such as: +- Automatic ID generation (when not provided) +- No need to manually specify the ``type`` field + +""" # noqa: E501 import warnings -from typing import Any, Literal, Union +from typing import Any, Literal, Optional, Union +from uuid import uuid4 + +from typing_extensions import NotRequired, TypedDict, TypeGuard + + +def _ensure_id(id_val: Optional[str]) -> str: + """Ensure the ID is a valid string, generating a new UUID if not provided. + + Auto-generated UUIDs are prefixed by ``'lc_'`` to indicate they are + LangChain-generated IDs. + + Args: + id_val: Optional string ID value to validate. + + Returns: + A valid string ID, either the provided value or a new UUID. + """ + return id_val or str(f"lc_{uuid4()}") + + +class Citation(TypedDict): + """Annotation for citing data from a document. + + .. note:: + ``start``/``end`` indices refer to the **response text**, + not the source text. This means that the indices are relative to the model's + response, not the original document (as specified in the ``url``). + + .. note:: + ``create_citation`` may also be used as a factory to create a ``Citation``. + Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ + + type: Literal["citation"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + url: NotRequired[str] + """URL of the document source.""" + + # For future consideration, if needed: + # provenance: NotRequired[str] + # """Provenance of the document, e.g., ``'Wikipedia'``, ``'arXiv'``, etc. + + # Included for future compatibility; not currently implemented. + # """ + + title: NotRequired[str] + """Source document title. + + For example, the page title for a web page or the title of a paper. + """ -from pydantic import TypeAdapter, ValidationError -from typing_extensions import NotRequired, TypedDict + start_index: NotRequired[int] + """Start index of the **response text** (``TextContentBlock.text``) for which the + annotation applies.""" + end_index: NotRequired[int] + """End index of the **response text** (``TextContentBlock.text``) for which the + annotation applies.""" + + cited_text: NotRequired[str] + """Excerpt of source text being cited.""" + + # NOTE: not including spans for the raw document text (such as `text_start_index` + # and `text_end_index`) as this is not currently supported by any provider. The + # thinking is that the `cited_text` should be sufficient for most use cases, and it + # is difficult to reliably extract spans from the raw document text across file + # formats or encoding schemes. + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class NonStandardAnnotation(TypedDict): + """Provider-specific annotation format.""" + + type: Literal["non_standard_annotation"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + value: dict[str, Any] + """Provider-specific annotation data.""" + + +Annotation = Union[Citation, NonStandardAnnotation] + + +class TextContentBlock(TypedDict): + """Text output from a LLM. + + This typically represents the main text content of a message, such as the response + from a language model or the text of a user message. + + .. note:: + ``create_text_block`` may also be used as a factory to create a + ``TextContentBlock``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ + + type: Literal["text"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + text: str + """Block text.""" + + annotations: NotRequired[list[Annotation]] + """``Citation``s and other annotations.""" + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class ToolCall(TypedDict): + """Represents a request to call a tool. + + Example: + + .. code-block:: python + + { + "name": "foo", + "args": {"a": 1}, + "id": "123" + } + + This represents a request to call the tool named "foo" with arguments {"a": 1} + and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ + + type: Literal["tool_call"] + """Used for discrimination.""" + + id: Optional[str] + """An identifier associated with the tool call. + + An identifier is needed to associate a tool call request with a tool + call result in events when multiple concurrent tool calls are made. + """ + # TODO: Consider making this NotRequired[str] in the future. + + name: str + """The name of the tool to be called.""" + + args: dict[str, Any] + """The arguments to the tool call.""" + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class ToolCallChunk(TypedDict): + """A chunk of a tool call (e.g., as part of a stream). + + When merging ``ToolCallChunks`` (e.g., via ``AIMessageChunk.__add__``), + all string attributes are concatenated. Chunks are only merged if their + values of ``index`` are equal and not ``None``. + + Example: + + .. code-block:: python + + left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] + right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] + + ( + AIMessageChunk(content="", tool_call_chunks=left_chunks) + + AIMessageChunk(content="", tool_call_chunks=right_chunks) + ).tool_call_chunks == [ToolCallChunk(name='foo', args='{"a":1}', index=0)] + """ -class BaseDataContentBlock(TypedDict, total=False): - """Base class for data content blocks.""" + # TODO: Consider making fields NotRequired[str] in the future. + + type: NotRequired[Literal["tool_call_chunk"]] + """Used for serialization.""" + + id: Optional[str] + """An identifier associated with the tool call.""" + + name: Optional[str] + """The name of the tool to be called.""" + + args: Optional[str] + """The arguments to the tool call.""" + + index: Optional[int] + """The index of the tool call in a sequence.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class InvalidToolCall(TypedDict): + """Allowance for errors made by LLM. + + Here we add an ``error`` key to surface errors made during generation + (e.g., invalid JSON arguments.) + """ + + # TODO: Consider making fields NotRequired[str] in the future. + + type: Literal["invalid_tool_call"] + """Used for discrimination.""" + + id: Optional[str] + """An identifier associated with the tool call.""" + + name: Optional[str] + """The name of the tool to be called.""" + + args: Optional[str] + """The arguments to the tool call.""" + + error: Optional[str] + """An error message associated with the tool call.""" + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +# Note: These are not standard tool calls, but rather provider-specific built-in tools. +# Web search +class WebSearchCall(TypedDict): + """Built-in web search tool call.""" + + type: Literal["web_search_call"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + query: NotRequired[str] + """The search query used in the web search tool call.""" + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class WebSearchResult(TypedDict): + """Result of a built-in web search tool call.""" + + type: Literal["web_search_result"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + urls: NotRequired[list[str]] + """List of URLs returned by the web search tool call.""" + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class CodeInterpreterCall(TypedDict): + """Built-in code interpreter tool call.""" + + type: Literal["code_interpreter_call"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + language: NotRequired[str] + """The name of the programming language used in the code interpreter tool call.""" + + code: NotRequired[str] + """The code to be executed by the code interpreter.""" + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class CodeInterpreterOutput(TypedDict): + """Output of a singular code interpreter tool call. + + Full output of a code interpreter tool call is represented by + ``CodeInterpreterResult`` which is a list of these blocks. + """ + + type: Literal["code_interpreter_output"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + return_code: NotRequired[int] + """Return code of the executed code. + + Example: ``0`` for success, non-zero for failure. + """ + + stderr: NotRequired[str] + """Standard error output of the executed code.""" + + stdout: NotRequired[str] + """Standard output of the executed code.""" + + file_ids: NotRequired[list[str]] + """List of file IDs generated by the code interpreter.""" + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class CodeInterpreterResult(TypedDict): + """Result of a code interpreter tool call.""" + + type: Literal["code_interpreter_result"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + output: list[CodeInterpreterOutput] + """List of outputs from the code interpreter tool call.""" + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class ReasoningContentBlock(TypedDict): + """Reasoning output from a LLM. + + .. note:: + ``create_reasoning_block`` may also be used as a factory to create a + ``ReasoningContentBlock``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ + + type: Literal["reasoning"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + reasoning: NotRequired[str] + """Reasoning text. + + Either the thought summary or the raw reasoning text itself. This is often parsed + from ```` tags in the model's response. + """ + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +# Note: `title` and `context` are fields that could be used to provide additional +# information about the file, such as a description or summary of its content. +# E.g. with Claude, you can provide a context for a file which is passed to the model. +class ImageContentBlock(TypedDict): + """Image data. + + .. note:: + ``create_image_block`` may also be used as a factory to create a + ``ImageContentBlock``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ + + type: Literal["image"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + file_id: NotRequired[str] + """ID of the image file, e.g., from a file storage system.""" mime_type: NotRequired[str] - """MIME type of the content block (if needed).""" + """MIME type of the image. Required for base64. + + `Examples from IANA `__ + """ + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + url: NotRequired[str] + """URL of the image.""" + + base64: NotRequired[str] + """Data as a base64 string.""" + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" -class URLContentBlock(BaseDataContentBlock): - """Content block for data from a URL.""" - type: Literal["image", "audio", "file"] - """Type of the content block.""" - source_type: Literal["url"] - """Source type (url).""" - url: str - """URL for data.""" +class VideoContentBlock(TypedDict): + """Video data. + .. note:: + ``create_video_block`` may also be used as a factory to create a + ``VideoContentBlock``. Benefits include: -class Base64ContentBlock(BaseDataContentBlock): - """Content block for inline data from a base64 string.""" + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ + + type: Literal["video"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ - type: Literal["image", "audio", "file"] - """Type of the content block.""" - source_type: Literal["base64"] - """Source type (base64).""" - data: str + file_id: NotRequired[str] + """ID of the video file, e.g., from a file storage system.""" + + mime_type: NotRequired[str] + """MIME type of the video. Required for base64. + + `Examples from IANA `__ + """ + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + url: NotRequired[str] + """URL of the video.""" + + base64: NotRequired[str] + """Data as a base64 string.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class AudioContentBlock(TypedDict): + """Audio data. + + .. note:: + ``create_audio_block`` may also be used as a factory to create an + ``AudioContentBlock``. Benefits include: + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ + + type: Literal["audio"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + file_id: NotRequired[str] + """ID of the audio file, e.g., from a file storage system.""" + + mime_type: NotRequired[str] + """MIME type of the audio. Required for base64. + + `Examples from IANA `__ + + """ + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + url: NotRequired[str] + """URL of the audio.""" + + base64: NotRequired[str] """Data as a base64 string.""" + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + -class PlainTextContentBlock(BaseDataContentBlock): - """Content block for plain text data (e.g., from a document).""" +class PlainTextContentBlock(TypedDict): + """Plaintext data (e.g., from a document). + + .. note:: + Title and context are optional fields that may be passed to the model. See + Anthropic `example `__. + + .. note:: + ``create_plaintext_block`` may also be used as a factory to create a + ``PlainTextContentBlock``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ + + type: Literal["text-plain"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + file_id: NotRequired[str] + """ID of the plaintext file, e.g., from a file storage system.""" + + mime_type: Literal["text/plain"] + """MIME type of the file. Required for base64.""" + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + url: NotRequired[str] + """URL of the plaintext.""" + + base64: NotRequired[str] + """Data as a base64 string.""" + + text: NotRequired[str] + """Plaintext content. This is optional if the data is provided as base64.""" + + title: NotRequired[str] + """Title of the text data, e.g., the title of a document.""" + + context: NotRequired[str] + """Context for the text, e.g., a description or summary of the text's content.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +class FileContentBlock(TypedDict): + """File data that doesn't fit into other multimodal blocks. + + This block is intended for files that are not images, audio, or plaintext. For + example, it can be used for PDFs, Word documents, etc. + + If the file is an image, audio, or plaintext, you should use the corresponding + content block type (e.g., ``ImageContentBlock``, ``AudioContentBlock``, + ``PlainTextContentBlock``). + + .. note:: + ``create_file_block`` may also be used as a factory to create a + ``FileContentBlock``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ type: Literal["file"] - """Type of the content block.""" - source_type: Literal["text"] - """Source type (text).""" - text: str - """Text data.""" + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ + + file_id: NotRequired[str] + """ID of the file, e.g., from a file storage system.""" + + mime_type: NotRequired[str] + """MIME type of the file. Required for base64. + + `Examples from IANA `__ + + """ + + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" + + url: NotRequired[str] + """URL of the file.""" + + base64: NotRequired[str] + """Data as a base64 string.""" + + extras: NotRequired[dict[str, Any]] + """Provider-specific metadata.""" + + +# Future modalities to consider: +# - 3D models +# - Tabular data + + +class NonStandardContentBlock(TypedDict): + """Provider-specific data. + + This block contains data for which there is not yet a standard type. + + The purpose of this block should be to simply hold a provider-specific payload. + If a provider's non-standard output includes reasoning and tool calls, it should be + the adapter's job to parse that payload and emit the corresponding standard + ``ReasoningContentBlock`` and ``ToolCallContentBlocks``. + + .. note:: + ``create_non_standard_block`` may also be used as a factory to create a + ``NonStandardContentBlock``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time + + """ + + type: Literal["non_standard"] + """Type of the content block. Used for discrimination.""" + + id: NotRequired[str] + """Content block identifier. Either: + - Generated by the provider (e.g., OpenAI's file ID) + - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ -class IDContentBlock(TypedDict): - """Content block for data specified by an identifier.""" + value: dict[str, Any] + """Provider-specific data.""" - type: Literal["image", "audio", "file"] - """Type of the content block.""" - source_type: Literal["id"] - """Source type (id).""" - id: str - """Identifier for data source.""" + index: NotRequired[int] + """Index of block in aggregate response. Used during streaming.""" +# --- Aliases --- DataContentBlock = Union[ - URLContentBlock, - Base64ContentBlock, + ImageContentBlock, + VideoContentBlock, + AudioContentBlock, PlainTextContentBlock, - IDContentBlock, + FileContentBlock, +] + +ToolContentBlock = Union[ + ToolCall, + ToolCallChunk, + CodeInterpreterCall, + CodeInterpreterOutput, + CodeInterpreterResult, + WebSearchCall, + WebSearchResult, ] -_DataContentBlockAdapter: TypeAdapter[DataContentBlock] = TypeAdapter(DataContentBlock) +ContentBlock = Union[ + TextContentBlock, + ToolCall, + ToolCallChunk, + InvalidToolCall, + ReasoningContentBlock, + NonStandardContentBlock, + DataContentBlock, + ToolContentBlock, +] -def is_data_content_block( - content_block: dict, -) -> bool: +KNOWN_BLOCK_TYPES = { + "text", + "text-plain", + "tool_call", + "invalid_tool_call", + "tool_call_chunk", + "reasoning", + "non_standard", + "image", + "audio", + "file", + "video", + "code_interpreter_call", + "code_interpreter_output", + "code_interpreter_result", + "web_search_call", + "web_search_result", +} + + +def is_data_content_block(block: dict) -> bool: """Check if the content block is a standard data content block. Args: - content_block: The content block to check. + block: The content block to check. Returns: True if the content block is a data content block, False otherwise. """ - try: - _ = _DataContentBlockAdapter.validate_python(content_block) - except ValidationError: - return False - else: - return True + return block.get("type") in ( + "audio", + "image", + "video", + "file", + "text-plain", + ) and any( + key in block + for key in ( + "url", + "base64", + "file_id", + "text", + "source_type", # backwards compatibility + ) + ) + + +def is_tool_call_block(block: ContentBlock) -> TypeGuard[ToolCall]: + """Type guard to check if a content block is a ``ToolCall``.""" + return block.get("type") == "tool_call" + + +def is_tool_call_chunk(block: ContentBlock) -> TypeGuard[ToolCallChunk]: + """Type guard to check if a content block is a ``ToolCallChunk``.""" + return block.get("type") == "tool_call_chunk" + + +def is_text_block(block: ContentBlock) -> TypeGuard[TextContentBlock]: + """Type guard to check if a content block is a ``TextContentBlock``.""" + return block.get("type") == "text" + +def is_reasoning_block(block: ContentBlock) -> TypeGuard[ReasoningContentBlock]: + """Type guard to check if a content block is a ``ReasoningContentBlock``.""" + return block.get("type") == "reasoning" -def convert_to_openai_image_block(content_block: dict[str, Any]) -> dict: + +def is_invalid_tool_call_block( + block: ContentBlock, +) -> TypeGuard[InvalidToolCall]: + """Type guard to check if a content block is an ``InvalidToolCall``.""" + return block.get("type") == "invalid_tool_call" + + +def convert_to_openai_image_block(block: dict[str, Any]) -> dict: """Convert image content block to format expected by OpenAI Chat Completions API.""" - if content_block["source_type"] == "url": + if "url" in block: return { "type": "image_url", "image_url": { - "url": content_block["url"], + "url": block["url"], }, } - if content_block["source_type"] == "base64": - if "mime_type" not in content_block: + if "base64" in block or block.get("source_type") == "base64": + if "mime_type" not in block: error_message = "mime_type key is required for base64 data." raise ValueError(error_message) - mime_type = content_block["mime_type"] + mime_type = block["mime_type"] + base64_data = block["data"] if "data" in block else block["base64"] return { "type": "image_url", "image_url": { - "url": f"data:{mime_type};base64,{content_block['data']}", + "url": f"data:{mime_type};base64,{base64_data}", }, } error_message = "Unsupported source type. Only 'url' and 'base64' are supported." @@ -117,39 +974,493 @@ def convert_to_openai_data_block(block: dict) -> dict: formatted_block = convert_to_openai_image_block(block) elif block["type"] == "file": - if block["source_type"] == "base64": - file = {"file_data": f"data:{block['mime_type']};base64,{block['data']}"} + if "base64" in block or block.get("source_type") == "base64": + base64_data = block["data"] if "source_type" in block else block["base64"] + file = {"file_data": f"data:{block['mime_type']};base64,{base64_data}"} if filename := block.get("filename"): file["filename"] = filename - elif (metadata := block.get("metadata")) and ("filename" in metadata): - file["filename"] = metadata["filename"] + elif (extras := block.get("extras")) and ("filename" in extras): + file["filename"] = extras["filename"] + elif (extras := block.get("metadata")) and ("filename" in extras): + # Backward compat + file["filename"] = extras["filename"] else: warnings.warn( "OpenAI may require a filename for file inputs. Specify a filename " - "in the content block: {'type': 'file', 'source_type': 'base64', " - "'mime_type': 'application/pdf', 'data': '...', " - "'filename': 'my-pdf'}", + "in the content block: {'type': 'file', 'mime_type': " + "'application/pdf', 'base64': '...', 'filename': 'my-pdf'}", stacklevel=1, ) formatted_block = {"type": "file", "file": file} - elif block["source_type"] == "id": - formatted_block = {"type": "file", "file": {"file_id": block["id"]}} + elif "file_id" in block or block.get("source_type") == "id": + file_id = block["id"] if "source_type" in block else block["file_id"] + formatted_block = {"type": "file", "file": {"file_id": file_id}} else: - error_msg = "source_type base64 or id is required for file blocks." + error_msg = "Keys base64 or file_id required for file blocks." raise ValueError(error_msg) elif block["type"] == "audio": - if block["source_type"] == "base64": + if "base64" in block or block.get("source_type") == "base64": + base64_data = block["data"] if "source_type" in block else block["base64"] audio_format = block["mime_type"].split("/")[-1] formatted_block = { "type": "input_audio", - "input_audio": {"data": block["data"], "format": audio_format}, + "input_audio": {"data": base64_data, "format": audio_format}, } else: - error_msg = "source_type base64 is required for audio blocks." + error_msg = "Key base64 is required for audio blocks." raise ValueError(error_msg) else: error_msg = f"Block of type {block['type']} is not supported." raise ValueError(error_msg) return formatted_block + + +def create_text_block( + text: str, + *, + id: Optional[str] = None, + annotations: Optional[list[Annotation]] = None, + index: Optional[int] = None, +) -> TextContentBlock: + """Create a ``TextContentBlock``. + + Args: + text: The text content of the block. + id: Content block identifier. Generated automatically if not provided. + annotations: ``Citation``s and other annotations for the text. + index: Index of block in aggregate response. Used during streaming. + + Returns: + A properly formatted ``TextContentBlock``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + block = TextContentBlock( + type="text", + text=text, + id=_ensure_id(id), + ) + if annotations is not None: + block["annotations"] = annotations + if index is not None: + block["index"] = index + return block + + +def create_image_block( + *, + url: Optional[str] = None, + base64: Optional[str] = None, + file_id: Optional[str] = None, + mime_type: Optional[str] = None, + id: Optional[str] = None, + index: Optional[int] = None, +) -> ImageContentBlock: + """Create an ``ImageContentBlock``. + + Args: + url: URL of the image. + base64: Base64-encoded image data. + file_id: ID of the image file from a file storage system. + mime_type: MIME type of the image. Required for base64 data. + id: Content block identifier. Generated automatically if not provided. + index: Index of block in aggregate response. Used during streaming. + + Returns: + A properly formatted ``ImageContentBlock``. + + Raises: + ValueError: If no image source is provided or if ``base64`` is used without + ``mime_type``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + if not any([url, base64, file_id]): + msg = "Must provide one of: url, base64, or file_id" + raise ValueError(msg) + + block = ImageContentBlock(type="image", id=_ensure_id(id)) + + if url is not None: + block["url"] = url + if base64 is not None: + block["base64"] = base64 + if file_id is not None: + block["file_id"] = file_id + if mime_type is not None: + block["mime_type"] = mime_type + if index is not None: + block["index"] = index + + return block + + +def create_video_block( + *, + url: Optional[str] = None, + base64: Optional[str] = None, + file_id: Optional[str] = None, + mime_type: Optional[str] = None, + id: Optional[str] = None, + index: Optional[int] = None, +) -> VideoContentBlock: + """Create a ``VideoContentBlock``. + + Args: + url: URL of the video. + base64: Base64-encoded video data. + file_id: ID of the video file from a file storage system. + mime_type: MIME type of the video. Required for base64 data. + id: Content block identifier. Generated automatically if not provided. + index: Index of block in aggregate response. Used during streaming. + + Returns: + A properly formatted ``VideoContentBlock``. + + Raises: + ValueError: If no video source is provided or if ``base64`` is used without + ``mime_type``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + if not any([url, base64, file_id]): + msg = "Must provide one of: url, base64, or file_id" + raise ValueError(msg) + + if base64 and not mime_type: + msg = "mime_type is required when using base64 data" + raise ValueError(msg) + + block = VideoContentBlock(type="video", id=_ensure_id(id)) + + if url is not None: + block["url"] = url + if base64 is not None: + block["base64"] = base64 + if file_id is not None: + block["file_id"] = file_id + if mime_type is not None: + block["mime_type"] = mime_type + if index is not None: + block["index"] = index + + return block + + +def create_audio_block( + *, + url: Optional[str] = None, + base64: Optional[str] = None, + file_id: Optional[str] = None, + mime_type: Optional[str] = None, + id: Optional[str] = None, + index: Optional[int] = None, +) -> AudioContentBlock: + """Create an ``AudioContentBlock``. + + Args: + url: URL of the audio. + base64: Base64-encoded audio data. + file_id: ID of the audio file from a file storage system. + mime_type: MIME type of the audio. Required for base64 data. + id: Content block identifier. Generated automatically if not provided. + index: Index of block in aggregate response. Used during streaming. + + Returns: + A properly formatted ``AudioContentBlock``. + + Raises: + ValueError: If no audio source is provided or if ``base64`` is used without + ``mime_type``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + if not any([url, base64, file_id]): + msg = "Must provide one of: url, base64, or file_id" + raise ValueError(msg) + + if base64 and not mime_type: + msg = "mime_type is required when using base64 data" + raise ValueError(msg) + + block = AudioContentBlock(type="audio", id=_ensure_id(id)) + + if url is not None: + block["url"] = url + if base64 is not None: + block["base64"] = base64 + if file_id is not None: + block["file_id"] = file_id + if mime_type is not None: + block["mime_type"] = mime_type + if index is not None: + block["index"] = index + + return block + + +def create_file_block( + *, + url: Optional[str] = None, + base64: Optional[str] = None, + file_id: Optional[str] = None, + mime_type: Optional[str] = None, + id: Optional[str] = None, + index: Optional[int] = None, +) -> FileContentBlock: + """Create a ``FileContentBlock``. + + Args: + url: URL of the file. + base64: Base64-encoded file data. + file_id: ID of the file from a file storage system. + mime_type: MIME type of the file. Required for base64 data. + id: Content block identifier. Generated automatically if not provided. + index: Index of block in aggregate response. Used during streaming. + + Returns: + A properly formatted ``FileContentBlock``. + + Raises: + ValueError: If no file source is provided or if ``base64`` is used without + ``mime_type``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + if not any([url, base64, file_id]): + msg = "Must provide one of: url, base64, or file_id" + raise ValueError(msg) + + if base64 and not mime_type: + msg = "mime_type is required when using base64 data" + raise ValueError(msg) + + block = FileContentBlock(type="file", id=_ensure_id(id)) + + if url is not None: + block["url"] = url + if base64 is not None: + block["base64"] = base64 + if file_id is not None: + block["file_id"] = file_id + if mime_type is not None: + block["mime_type"] = mime_type + if index is not None: + block["index"] = index + + return block + + +def create_plaintext_block( + text: Optional[str] = None, + url: Optional[str] = None, + base64: Optional[str] = None, + file_id: Optional[str] = None, + title: Optional[str] = None, + context: Optional[str] = None, + id: Optional[str] = None, + index: Optional[int] = None, +) -> PlainTextContentBlock: + """Create a ``PlainTextContentBlock``. + + Args: + text: The plaintext content. + url: URL of the plaintext file. + base64: Base64-encoded plaintext data. + file_id: ID of the plaintext file from a file storage system. + title: Title of the text data. + context: Context or description of the text content. + id: Content block identifier. Generated automatically if not provided. + index: Index of block in aggregate response. Used during streaming. + + Returns: + A properly formatted ``PlainTextContentBlock``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + block = PlainTextContentBlock( + type="text-plain", + mime_type="text/plain", + id=_ensure_id(id), + ) + + if text is not None: + block["text"] = text + if url is not None: + block["url"] = url + if base64 is not None: + block["base64"] = base64 + if file_id is not None: + block["file_id"] = file_id + if title is not None: + block["title"] = title + if context is not None: + block["context"] = context + if index is not None: + block["index"] = index + + return block + + +def create_tool_call( + name: str, + args: dict[str, Any], + *, + id: Optional[str] = None, + index: Optional[int] = None, +) -> ToolCall: + """Create a ``ToolCall``. + + Args: + name: The name of the tool to be called. + args: The arguments to the tool call. + id: An identifier for the tool call. Generated automatically if not provided. + index: Index of block in aggregate response. Used during streaming. + + Returns: + A properly formatted ``ToolCall``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + block = ToolCall( + type="tool_call", + name=name, + args=args, + id=_ensure_id(id), + ) + + if index is not None: + block["index"] = index + + return block + + +def create_reasoning_block( + reasoning: Optional[str] = None, + id: Optional[str] = None, + index: Optional[int] = None, +) -> ReasoningContentBlock: + """Create a ``ReasoningContentBlock``. + + Args: + reasoning: The reasoning text or thought summary. + id: Content block identifier. Generated automatically if not provided. + index: Index of block in aggregate response. Used during streaming. + + Returns: + A properly formatted ``ReasoningContentBlock``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + block = ReasoningContentBlock( + type="reasoning", + reasoning=reasoning or "", + id=_ensure_id(id), + ) + + if index is not None: + block["index"] = index + + return block + + +def create_citation( + *, + url: Optional[str] = None, + title: Optional[str] = None, + start_index: Optional[int] = None, + end_index: Optional[int] = None, + cited_text: Optional[str] = None, + id: Optional[str] = None, +) -> Citation: + """Create a ``Citation``. + + Args: + url: URL of the document source. + title: Source document title. + start_index: Start index in the response text where citation applies. + end_index: End index in the response text where citation applies. + cited_text: Excerpt of source text being cited. + id: Content block identifier. Generated automatically if not provided. + + Returns: + A properly formatted ``Citation``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + block = Citation(type="citation", id=_ensure_id(id)) + + if url is not None: + block["url"] = url + if title is not None: + block["title"] = title + if start_index is not None: + block["start_index"] = start_index + if end_index is not None: + block["end_index"] = end_index + if cited_text is not None: + block["cited_text"] = cited_text + + return block + + +def create_non_standard_block( + value: dict[str, Any], + *, + id: Optional[str] = None, + index: Optional[int] = None, +) -> NonStandardContentBlock: + """Create a ``NonStandardContentBlock``. + + Args: + value: Provider-specific data. + id: Content block identifier. Generated automatically if not provided. + index: Index of block in aggregate response. Used during streaming. + + Returns: + A properly formatted ``NonStandardContentBlock``. + + .. note:: + The ``id`` is generated automatically if not provided, using a UUID4 format + prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. + + """ + block = NonStandardContentBlock( + type="non_standard", + value=value, + id=_ensure_id(id), + ) + + if index is not None: + block["index"] = index + + return block diff --git a/libs/core/langchain_core/messages/modifier.py b/libs/core/langchain_core/messages/modifier.py index 08b7e79b69cc6..5f1602a4908d3 100644 --- a/libs/core/langchain_core/messages/modifier.py +++ b/libs/core/langchain_core/messages/modifier.py @@ -13,7 +13,7 @@ class RemoveMessage(BaseMessage): def __init__( self, - id: str, # noqa: A002 + id: str, **kwargs: Any, ) -> None: """Create a RemoveMessage. diff --git a/libs/core/langchain_core/messages/tool.py b/libs/core/langchain_core/messages/tool.py index 1f8a519a7dc24..181c80443d56f 100644 --- a/libs/core/langchain_core/messages/tool.py +++ b/libs/core/langchain_core/messages/tool.py @@ -5,9 +5,12 @@ from uuid import UUID from pydantic import Field, model_validator -from typing_extensions import NotRequired, TypedDict, override +from typing_extensions import override from langchain_core.messages.base import BaseMessage, BaseMessageChunk, merge_content +from langchain_core.messages.content_blocks import InvalidToolCall as InvalidToolCall +from langchain_core.messages.content_blocks import ToolCall as ToolCall +from langchain_core.messages.content_blocks import ToolCallChunk as ToolCallChunk from langchain_core.utils._merge import merge_dicts, merge_obj @@ -177,42 +180,11 @@ def __add__(self, other: Any) -> BaseMessageChunk: # type: ignore[override] return super().__add__(other) -class ToolCall(TypedDict): - """Represents a request to call a tool. - - Example: - - .. code-block:: python - - { - "name": "foo", - "args": {"a": 1}, - "id": "123" - } - - This represents a request to call the tool named "foo" with arguments {"a": 1} - and an identifier of "123". - - """ - - name: str - """The name of the tool to be called.""" - args: dict[str, Any] - """The arguments to the tool call.""" - id: Optional[str] - """An identifier associated with the tool call. - - An identifier is needed to associate a tool call request with a tool - call result in events when multiple concurrent tool calls are made. - """ - type: NotRequired[Literal["tool_call"]] - - def tool_call( *, name: str, args: dict[str, Any], - id: Optional[str], # noqa: A002 + id: Optional[str], ) -> ToolCall: """Create a tool call. @@ -224,43 +196,11 @@ def tool_call( return ToolCall(name=name, args=args, id=id, type="tool_call") -class ToolCallChunk(TypedDict): - """A chunk of a tool call (e.g., as part of a stream). - - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), - all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. - - Example: - - .. code-block:: python - - left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] - right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] - - ( - AIMessageChunk(content="", tool_call_chunks=left_chunks) - + AIMessageChunk(content="", tool_call_chunks=right_chunks) - ).tool_call_chunks == [ToolCallChunk(name='foo', args='{"a":1}', index=0)] - - """ - - name: Optional[str] - """The name of the tool to be called.""" - args: Optional[str] - """The arguments to the tool call.""" - id: Optional[str] - """An identifier associated with the tool call.""" - index: Optional[int] - """The index of the tool call in a sequence.""" - type: NotRequired[Literal["tool_call_chunk"]] - - def tool_call_chunk( *, name: Optional[str] = None, args: Optional[str] = None, - id: Optional[str] = None, # noqa: A002 + id: Optional[str] = None, index: Optional[int] = None, ) -> ToolCallChunk: """Create a tool call chunk. @@ -276,29 +216,11 @@ def tool_call_chunk( ) -class InvalidToolCall(TypedDict): - """Allowance for errors made by LLM. - - Here we add an `error` key to surface errors made during generation - (e.g., invalid JSON arguments.) - """ - - name: Optional[str] - """The name of the tool to be called.""" - args: Optional[str] - """The arguments to the tool call.""" - id: Optional[str] - """An identifier associated with the tool call.""" - error: Optional[str] - """An error message associated with the tool call.""" - type: NotRequired[Literal["invalid_tool_call"]] - - def invalid_tool_call( *, name: Optional[str] = None, args: Optional[str] = None, - id: Optional[str] = None, # noqa: A002 + id: Optional[str] = None, error: Optional[str] = None, ) -> InvalidToolCall: """Create an invalid tool call. diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py index 37f16c632bd06..e84dc6c0191ed 100644 --- a/libs/core/langchain_core/messages/utils.py +++ b/libs/core/langchain_core/messages/utils.py @@ -213,7 +213,7 @@ def _create_message_from_message_type( name: Optional[str] = None, tool_call_id: Optional[str] = None, tool_calls: Optional[list[dict[str, Any]]] = None, - id: Optional[str] = None, # noqa: A002 + id: Optional[str] = None, **additional_kwargs: Any, ) -> BaseMessage: """Create a message from a message type and content string. diff --git a/libs/core/langchain_core/output_parsers/transform.py b/libs/core/langchain_core/output_parsers/transform.py index 876e66b555669..0c864805b9335 100644 --- a/libs/core/langchain_core/output_parsers/transform.py +++ b/libs/core/langchain_core/output_parsers/transform.py @@ -32,7 +32,7 @@ class BaseTransformOutputParser(BaseOutputParser[T]): def _transform( self, - input: Iterator[Union[str, BaseMessage]], # noqa: A002 + input: Iterator[Union[str, BaseMessage]], ) -> Iterator[T]: for chunk in input: if isinstance(chunk, BaseMessage): @@ -42,7 +42,7 @@ def _transform( async def _atransform( self, - input: AsyncIterator[Union[str, BaseMessage]], # noqa: A002 + input: AsyncIterator[Union[str, BaseMessage]], ) -> AsyncIterator[T]: async for chunk in input: if isinstance(chunk, BaseMessage): diff --git a/libs/core/langchain_core/runnables/base.py b/libs/core/langchain_core/runnables/base.py index 6e0be997af6ef..17024df7bb766 100644 --- a/libs/core/langchain_core/runnables/base.py +++ b/libs/core/langchain_core/runnables/base.py @@ -728,7 +728,7 @@ def assign( @abstractmethod def invoke( self, - input: Input, # noqa: A002 + input: Input, config: Optional[RunnableConfig] = None, **kwargs: Any, ) -> Output: @@ -748,7 +748,7 @@ def invoke( async def ainvoke( self, - input: Input, # noqa: A002 + input: Input, config: Optional[RunnableConfig] = None, **kwargs: Any, ) -> Output: @@ -996,7 +996,7 @@ async def ainvoke_task( def stream( self, - input: Input, # noqa: A002 + input: Input, config: Optional[RunnableConfig] = None, **kwargs: Optional[Any], ) -> Iterator[Output]: @@ -1016,7 +1016,7 @@ def stream( async def astream( self, - input: Input, # noqa: A002 + input: Input, config: Optional[RunnableConfig] = None, **kwargs: Optional[Any], ) -> AsyncIterator[Output]: @@ -1070,7 +1070,7 @@ def astream_log( async def astream_log( self, - input: Any, # noqa: A002 + input: Any, config: Optional[RunnableConfig] = None, *, diff: bool = True, @@ -1141,7 +1141,7 @@ async def astream_log( async def astream_events( self, - input: Any, # noqa: A002 + input: Any, config: Optional[RunnableConfig] = None, *, version: Literal["v1", "v2"] = "v2", @@ -1407,7 +1407,7 @@ async def slow_thing(some_input: str, config: RunnableConfig) -> str: def transform( self, - input: Iterator[Input], # noqa: A002 + input: Iterator[Input], config: Optional[RunnableConfig] = None, **kwargs: Optional[Any], ) -> Iterator[Output]: @@ -1449,7 +1449,7 @@ def transform( async def atransform( self, - input: AsyncIterator[Input], # noqa: A002 + input: AsyncIterator[Input], config: Optional[RunnableConfig] = None, **kwargs: Optional[Any], ) -> AsyncIterator[Output]: diff --git a/libs/core/langchain_core/runnables/config.py b/libs/core/langchain_core/runnables/config.py index 4ac7bda7b4654..cc36622b914bf 100644 --- a/libs/core/langchain_core/runnables/config.py +++ b/libs/core/langchain_core/runnables/config.py @@ -402,7 +402,7 @@ def call_func_with_variable_args( Callable[[Input, CallbackManagerForChainRun], Output], Callable[[Input, CallbackManagerForChainRun, RunnableConfig], Output], ], - input: Input, # noqa: A002 + input: Input, config: RunnableConfig, run_manager: Optional[CallbackManagerForChainRun] = None, **kwargs: Any, @@ -439,7 +439,7 @@ def acall_func_with_variable_args( Awaitable[Output], ], ], - input: Input, # noqa: A002 + input: Input, config: RunnableConfig, run_manager: Optional[AsyncCallbackManagerForChainRun] = None, **kwargs: Any, diff --git a/libs/core/langchain_core/runnables/graph.py b/libs/core/langchain_core/runnables/graph.py index 3e22494bad7fb..20a841d51a84f 100644 --- a/libs/core/langchain_core/runnables/graph.py +++ b/libs/core/langchain_core/runnables/graph.py @@ -114,7 +114,7 @@ class Node(NamedTuple): def copy( self, *, - id: Optional[str] = None, # noqa: A002 + id: Optional[str] = None, name: Optional[str] = None, ) -> Node: """Return a copy of the node with optional new id and name. @@ -187,7 +187,7 @@ class MermaidDrawMethod(Enum): def node_data_str( - id: str, # noqa: A002 + id: str, data: Union[type[BaseModel], RunnableType, None], ) -> str: """Convert the data of a node to a string. @@ -328,7 +328,7 @@ def next_id(self) -> str: def add_node( self, data: Union[type[BaseModel], RunnableType, None], - id: Optional[str] = None, # noqa: A002 + id: Optional[str] = None, *, metadata: Optional[dict[str, Any]] = None, ) -> Node: diff --git a/libs/core/langchain_core/utils/function_calling.py b/libs/core/langchain_core/utils/function_calling.py index 609129ac58b1b..b6e04c9abae88 100644 --- a/libs/core/langchain_core/utils/function_calling.py +++ b/libs/core/langchain_core/utils/function_calling.py @@ -627,7 +627,7 @@ def convert_to_json_schema( @beta() def tool_example_to_messages( - input: str, # noqa: A002 + input: str, tool_calls: list[BaseModel], tool_outputs: Optional[list[str]] = None, *, diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml index e329e0d3c5cf6..a6e5ed777acc2 100644 --- a/libs/core/pyproject.toml +++ b/libs/core/pyproject.toml @@ -86,6 +86,7 @@ ignore = [ "FIX002", # Line contains TODO "ISC001", # Messes with the formatter "PERF203", # Rarely useful + "PLC0414", # Enable re-export "PLR09", # Too many something (arg, statements, etc) "RUF012", # Doesn't play well with Pydantic "TC001", # Doesn't play well with Pydantic @@ -105,6 +106,7 @@ unfixable = ["PLW1510",] flake8-annotations.allow-star-arg-any = true flake8-annotations.mypy-init-return = true +flake8-builtins.ignorelist = ["id", "input", "type"] flake8-type-checking.runtime-evaluated-base-classes = ["pydantic.BaseModel","langchain_core.load.serializable.Serializable","langchain_core.runnables.base.RunnableSerializable"] pep8-naming.classmethod-decorators = [ "classmethod", "langchain_core.utils.pydantic.pre_init", "pydantic.field_validator", "pydantic.v1.root_validator",] pydocstyle.convention = "google" diff --git a/libs/core/tests/unit_tests/messages/test_imports.py b/libs/core/tests/unit_tests/messages/test_imports.py index ff9fbf92fc77e..750f2f49f060d 100644 --- a/libs/core/tests/unit_tests/messages/test_imports.py +++ b/libs/core/tests/unit_tests/messages/test_imports.py @@ -5,26 +5,48 @@ "_message_from_dict", "AIMessage", "AIMessageChunk", + "Annotation", "AnyMessage", + "AudioContentBlock", "BaseMessage", "BaseMessageChunk", + "ContentBlock", "ChatMessage", "ChatMessageChunk", + "Citation", + "CodeInterpreterCall", + "CodeInterpreterOutput", + "CodeInterpreterResult", + "DataContentBlock", + "FileContentBlock", "FunctionMessage", "FunctionMessageChunk", "HumanMessage", "HumanMessageChunk", + "ImageContentBlock", "InvalidToolCall", + "NonStandardAnnotation", + "NonStandardContentBlock", + "PlainTextContentBlock", "SystemMessage", "SystemMessageChunk", + "TextContentBlock", "ToolCall", "ToolCallChunk", "ToolMessage", "ToolMessageChunk", + "VideoContentBlock", + "WebSearchCall", + "WebSearchResult", + "ReasoningContentBlock", "RemoveMessage", "convert_to_messages", "get_buffer_string", "is_data_content_block", + "is_reasoning_block", + "is_text_block", + "is_tool_call_block", + "is_tool_call_chunk", "merge_content", "message_chunk_to_message", "message_to_dict", diff --git a/libs/core/tests/unit_tests/messages/test_utils.py b/libs/core/tests/unit_tests/messages/test_utils.py index bedd518589ea0..f9f1c9c9ff081 100644 --- a/libs/core/tests/unit_tests/messages/test_utils.py +++ b/libs/core/tests/unit_tests/messages/test_utils.py @@ -1221,15 +1221,30 @@ def test_convert_to_openai_messages_multimodal() -> None: {"type": "text", "text": "Text message"}, { "type": "image", - "source_type": "url", "url": "https://example.com/test.png", }, + { + "type": "image", + "source_type": "url", # backward compatibility + "url": "https://example.com/test.png", + }, + { + "type": "image", + "base64": "", + "mime_type": "image/png", + }, { "type": "image", "source_type": "base64", "data": "", "mime_type": "image/png", }, + { + "type": "file", + "base64": "", + "mime_type": "application/pdf", + "filename": "test.pdf", + }, { "type": "file", "source_type": "base64", @@ -1244,11 +1259,20 @@ def test_convert_to_openai_messages_multimodal() -> None: "file_data": "data:application/pdf;base64,", }, }, + { + "type": "file", + "file_id": "file-abc123", + }, { "type": "file", "source_type": "id", "id": "file-abc123", }, + { + "type": "audio", + "base64": "", + "mime_type": "audio/wav", + }, { "type": "audio", "source_type": "base64", @@ -1268,7 +1292,7 @@ def test_convert_to_openai_messages_multimodal() -> None: result = convert_to_openai_messages(messages, text_format="block") assert len(result) == 1 message = result[0] - assert len(message["content"]) == 8 + assert len(message["content"]) == 13 # Test adding filename messages = [ @@ -1276,8 +1300,7 @@ def test_convert_to_openai_messages_multimodal() -> None: content=[ { "type": "file", - "source_type": "base64", - "data": "", + "base64": "", "mime_type": "application/pdf", }, ] diff --git a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr index 7c07416fe5d9c..f45b2f7dae9b6 100644 --- a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr +++ b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr @@ -726,7 +726,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -752,6 +752,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -763,6 +767,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -781,9 +789,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -998,12 +1007,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -1015,6 +1035,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -1026,9 +1050,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -1037,9 +1062,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + When merging ``ToolCallChunks`` (e.g., via ``AIMessageChunk.__add__``), all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. + values of ``index`` are equal and not ``None``. Example: @@ -1065,6 +1090,10 @@ ]), 'title': 'Args', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -1105,9 +1134,9 @@ }), }), 'required': list([ + 'id', 'name', 'args', - 'id', 'index', ]), 'title': 'ToolCallChunk', @@ -2158,7 +2187,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -2184,6 +2213,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -2195,6 +2228,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -2213,9 +2250,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -2430,12 +2468,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -2447,6 +2496,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -2458,9 +2511,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -2469,9 +2523,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + When merging ``ToolCallChunks`` (e.g., via ``AIMessageChunk.__add__``), all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. + values of ``index`` are equal and not ``None``. Example: @@ -2497,6 +2551,10 @@ ]), 'title': 'Args', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -2537,9 +2595,9 @@ }), }), 'required': list([ + 'id', 'name', 'args', - 'id', 'index', ]), 'title': 'ToolCallChunk', diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr index a788c425fced3..1a5bcc93dad87 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr @@ -1129,7 +1129,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -1155,6 +1155,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -1166,6 +1170,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -1184,9 +1192,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -1401,12 +1410,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -1418,6 +1438,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -1429,9 +1453,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -1440,9 +1465,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + When merging ``ToolCallChunks`` (e.g., via ``AIMessageChunk.__add__``), all string attributes are concatenated. Chunks are only merged if their - values of `index` are equal and not None. + values of ``index`` are equal and not ``None``. Example: @@ -1468,6 +1493,10 @@ ]), 'title': 'Args', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -1508,9 +1537,9 @@ }), }), 'required': list([ + 'id', 'name', 'args', - 'id', 'index', ]), 'title': 'ToolCallChunk', diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index 0656a2f2e974b..89c77414dc0b8 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -3,6 +3,7 @@ from typing import Optional, Union import pytest +from typing_extensions import get_args from langchain_core.documents import Document from langchain_core.load import dumpd, load @@ -30,10 +31,16 @@ messages_from_dict, messages_to_dict, ) +from langchain_core.messages.content_blocks import KNOWN_BLOCK_TYPES, ContentBlock from langchain_core.messages.tool import invalid_tool_call as create_invalid_tool_call from langchain_core.messages.tool import tool_call as create_tool_call from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk from langchain_core.utils._merge import merge_lists +from langchain_core.v1.messages import AIMessage as AIMessageV1 +from langchain_core.v1.messages import AIMessageChunk as AIMessageChunkV1 +from langchain_core.v1.messages import HumanMessage as HumanMessageV1 +from langchain_core.v1.messages import SystemMessage as SystemMessageV1 +from langchain_core.v1.messages import ToolMessage as ToolMessageV1 def test_message_init() -> None: @@ -181,20 +188,170 @@ def test_message_chunks() -> None: # Test ID order of precedence null_id = AIMessageChunk(content="", id=None) default_id = AIMessageChunk( - content="", id="run-abc123" + content="", id="lc_run--abc123" ) # LangChain-assigned run ID meaningful_id = AIMessageChunk(content="", id="msg_def456") # provider-assigned ID - assert (null_id + default_id).id == "run-abc123" - assert (default_id + null_id).id == "run-abc123" + assert (null_id + default_id).id == "lc_run--abc123" + assert (default_id + null_id).id == "lc_run--abc123" assert (null_id + meaningful_id).id == "msg_def456" assert (meaningful_id + null_id).id == "msg_def456" + # Provider assigned IDs have highest precedence assert (default_id + meaningful_id).id == "msg_def456" assert (meaningful_id + default_id).id == "msg_def456" +def test_message_chunks_v1() -> None: + left = AIMessageChunkV1("foo ", id="abc") + right = AIMessageChunkV1("bar") + expected = AIMessageChunkV1("foo bar", id="abc") + assert left + right == expected + + # Test tool calls + one = AIMessageChunkV1( + [], + tool_call_chunks=[ + create_tool_call_chunk(name="tool1", args="", id="1", index=0) + ], + ) + two = AIMessageChunkV1( + [], + tool_call_chunks=[ + create_tool_call_chunk(name=None, args='{"arg1": "val', id=None, index=0) + ], + ) + three = AIMessageChunkV1( + [], + tool_call_chunks=[ + create_tool_call_chunk(name=None, args='ue}"', id=None, index=0) + ], + ) + result = one + two + three + expected = AIMessageChunkV1( + [], + tool_call_chunks=[ + create_tool_call_chunk( + name="tool1", args='{"arg1": "value}"', id="1", index=0 + ) + ], + id=result.id, # Use the same ID as the result + ) + assert result == expected + + converted_message = result.to_message() + assert converted_message == AIMessageV1( + content=[ + { + "name": "tool1", + "args": {"arg1": "value}"}, + "id": "1", + "type": "tool_call", + } + ], + id=converted_message.id, # Use the same ID as the converted message + ) + + chunk1 = AIMessageChunkV1( + [], + tool_call_chunks=[ + create_tool_call_chunk(name="tool1", args="", id="1", index=0) + ], + ) + chunk2 = AIMessageChunkV1( + [], + tool_call_chunks=[ + create_tool_call_chunk(name="tool1", args="a", id=None, index=1) + ], + ) + # Don't merge if `index` field does not match. + merge_result = chunk1 + chunk2 + assert merge_result == AIMessageChunkV1( + [], + tool_call_chunks=[ + create_tool_call_chunk(name="tool1", args="", id="1", index=0), + create_tool_call_chunk(name="tool1", args="a", id=None, index=1), + ], + id=merge_result.id, # Use the same ID as the merge result + ) + + ai_msg_chunk = AIMessageChunkV1([]) + tool_calls_msg_chunk = AIMessageChunkV1( + [], + tool_call_chunks=[ + create_tool_call_chunk(name="tool1", args="a", id=None, index=1) + ], + ) + # These assertions test that adding empty chunks preserves the non-empty chunk + result1 = ai_msg_chunk + tool_calls_msg_chunk + assert result1.tool_call_chunks == tool_calls_msg_chunk.tool_call_chunks + assert result1.content == tool_calls_msg_chunk.content + + result2 = tool_calls_msg_chunk + ai_msg_chunk + assert result2.tool_call_chunks == tool_calls_msg_chunk.tool_call_chunks + assert result2.content == tool_calls_msg_chunk.content + + ai_msg_chunk = AIMessageChunkV1( + [], + tool_call_chunks=[ + create_tool_call_chunk(name="tool1", args="", id="1", index=0) + ], + ) + assert ai_msg_chunk.tool_calls == [create_tool_call(name="tool1", args={}, id="1")] + + # Test token usage + left = AIMessageChunkV1( + [], + usage_metadata={"input_tokens": 1, "output_tokens": 2, "total_tokens": 3}, + ) + right = AIMessageChunkV1( + [], + usage_metadata={"input_tokens": 4, "output_tokens": 5, "total_tokens": 9}, + ) + usage_result = left + right + expected_usage = AIMessageChunkV1( + content=[], + usage_metadata={"input_tokens": 5, "output_tokens": 7, "total_tokens": 12}, + id=usage_result.id, # Use the same ID as the result + ) + assert usage_result == expected_usage + + # Test adding empty chunks preserves the original + left_result = AIMessageChunkV1(content=[]) + left + assert left_result.usage_metadata == left.usage_metadata + assert left_result.content == left.content + + right_result = right + AIMessageChunkV1(content=[]) + assert right_result.usage_metadata == right.usage_metadata + assert right_result.content == right.content + + # Test ID order of precedence + # Note: AIMessageChunkV1 always generates an ID if none provided + auto_id = AIMessageChunkV1(content=[]) # Gets auto-generated lc_* ID + default_id = AIMessageChunkV1( + content=[], id="lc_run--abc123" + ) # LangChain-assigned run ID + meaningful_id = AIMessageChunkV1( + content=[], id="msg_def456" + ) # provider-assigned ID + + # Provider-assigned IDs always win over LangChain-generated IDs + assert (auto_id + meaningful_id).id == "msg_def456" # provider-assigned wins + assert (meaningful_id + auto_id).id == "msg_def456" # provider-assigned wins + + assert ( + default_id + meaningful_id + ).id == "msg_def456" # meaningful_id is provider-assigned + assert ( + meaningful_id + default_id + ).id == "msg_def456" # meaningful_id is provider-assigned + + # Between auto-generated and lc_run--* IDs, run IDs win + assert (auto_id + default_id).id == default_id.id + assert (default_id + auto_id).id == default_id.id + + def test_chat_message_chunks() -> None: assert ChatMessageChunk(role="User", content="I am", id="ai4") + ChatMessageChunk( role="User", content=" indeed." @@ -207,7 +364,7 @@ def test_chat_message_chunks() -> None: ): ChatMessageChunk(role="User", content="I am") + ChatMessageChunk( role="Assistant", content=" indeed." - ) + ) # type: ignore[reportUnusedExpression, unused-ignore] assert ChatMessageChunk(role="User", content="I am") + AIMessageChunk( content=" indeed." @@ -316,7 +473,7 @@ def test_function_message_chunks() -> None: ): FunctionMessageChunk(name="hello", content="I am") + FunctionMessageChunk( name="bye", content=" indeed." - ) + ) # type: ignore[reportUnusedExpression, unused-ignore] def test_ai_message_chunks() -> None: @@ -332,7 +489,7 @@ def test_ai_message_chunks() -> None: ): AIMessageChunk(example=True, content="I am") + AIMessageChunk( example=False, content=" indeed." - ) + ) # type: ignore[reportUnusedExpression, unused-ignore] class TestGetBufferString(unittest.TestCase): @@ -1116,23 +1273,20 @@ def test_is_data_content_block() -> None: assert is_data_content_block( { "type": "image", - "source_type": "url", "url": "https://...", } ) assert is_data_content_block( { "type": "image", - "source_type": "base64", - "data": "", + "base64": "", "mime_type": "image/jpeg", } ) assert is_data_content_block( { "type": "image", - "source_type": "base64", - "data": "", + "base64": "", "mime_type": "image/jpeg", "cache_control": {"type": "ephemeral"}, } @@ -1140,13 +1294,17 @@ def test_is_data_content_block() -> None: assert is_data_content_block( { "type": "image", - "source_type": "base64", - "data": "", + "base64": "", "mime_type": "image/jpeg", "metadata": {"cache_control": {"type": "ephemeral"}}, } ) - + assert is_data_content_block( + { + "type": "image", + "source_type": "base64", # backward compatibility + } + ) assert not is_data_content_block( { "type": "text", @@ -1162,43 +1320,124 @@ def test_is_data_content_block() -> None: assert not is_data_content_block( { "type": "image", - "source_type": "base64", + "source": "", } ) - assert not is_data_content_block( + + +def test_convert_to_openai_image_block() -> None: + for input_block in [ { "type": "image", - "source": "", + "url": "https://...", + "cache_control": {"type": "ephemeral"}, + }, + { + "type": "image", + "source_type": "url", + "url": "https://...", + "cache_control": {"type": "ephemeral"}, + }, + ]: + expected = { + "type": "image_url", + "image_url": {"url": "https://..."}, } - ) + result = convert_to_openai_image_block(input_block) + assert result == expected + for input_block in [ + { + "type": "image", + "base64": "", + "mime_type": "image/jpeg", + "cache_control": {"type": "ephemeral"}, + }, + { + "type": "image", + "source_type": "base64", + "data": "", + "mime_type": "image/jpeg", + "cache_control": {"type": "ephemeral"}, + }, + ]: + expected = { + "type": "image_url", + "image_url": { + "url": "data:image/jpeg;base64,", + }, + } + result = convert_to_openai_image_block(input_block) + assert result == expected -def test_convert_to_openai_image_block() -> None: - input_block = { - "type": "image", - "source_type": "url", - "url": "https://...", - "cache_control": {"type": "ephemeral"}, - } - expected = { - "type": "image_url", - "image_url": {"url": "https://..."}, - } - result = convert_to_openai_image_block(input_block) - assert result == expected - input_block = { - "type": "image", - "source_type": "base64", - "data": "", - "mime_type": "image/jpeg", - "cache_control": {"type": "ephemeral"}, +def test_known_block_types() -> None: + expected = { + bt + for bt in get_args(ContentBlock) + for bt in get_args(bt.__annotations__["type"]) } + # Normalize any Literal[...] types in block types to their string values. + # This ensures all entries are plain strings, not Literal objects. expected = { - "type": "image_url", - "image_url": { - "url": "data:image/jpeg;base64,", - }, + t + if isinstance(t, str) + else t.__args__[0] + if hasattr(t, "__args__") and len(t.__args__) == 1 + else t + for t in expected } - result = convert_to_openai_image_block(input_block) - assert result == expected + assert expected == KNOWN_BLOCK_TYPES + + +def test_v1_text_accessor() -> None: + """Test that v1 message.text property and .text() method return the same value.""" + # Test HumanMessage + human_msg = HumanMessageV1(content="Hello world") + assert human_msg.text == "Hello world" + assert human_msg.text() == "Hello world" # type: ignore[operator] + assert str(human_msg.text) == human_msg.text() # type: ignore[operator] + + # Test SystemMessage + system_msg = SystemMessageV1(content="You are a helpful assistant") + assert system_msg.text == "You are a helpful assistant" + assert system_msg.text() == "You are a helpful assistant" # type: ignore[operator] + assert str(system_msg.text) == system_msg.text() # type: ignore[operator] + + # Test AIMessage + ai_msg = AIMessageV1(content="I can help you with that") + assert ai_msg.text == "I can help you with that" + assert ai_msg.text() == "I can help you with that" # type: ignore[operator] + assert str(ai_msg.text) == ai_msg.text() # type: ignore[operator] + + # Test ToolMessage + tool_msg = ToolMessageV1(content="Task completed", tool_call_id="tool_1") + assert tool_msg.text == "Task completed" + assert tool_msg.text() == "Task completed" # type: ignore[operator] + assert str(tool_msg.text) == tool_msg.text() # type: ignore[operator] + + # Test with complex content (list of content blocks) + complex_msg = HumanMessageV1( + content=[{"type": "text", "text": "Hello "}, {"type": "text", "text": "world"}] + ) + assert complex_msg.text == "Hello world" + assert complex_msg.text() == "Hello world" # type: ignore[operator] + assert str(complex_msg.text) == complex_msg.text() # type: ignore[operator] + + # Test with mixed content (text and non-text blocks) + mixed_msg = AIMessageV1( + content=[ + {"type": "text", "text": "The answer is "}, + {"type": "tool_call", "name": "calculate", "args": {"x": 2}, "id": "1"}, + {"type": "text", "text": "42"}, + ] + ) + assert mixed_msg.text == "The answer is 42" + assert mixed_msg.text() == "The answer is 42" # type: ignore[operator] + assert str(mixed_msg.text) == mixed_msg.text() # type: ignore[operator] + + # Test empty content + empty_msg = HumanMessageV1(content=[]) + assert empty_msg.text == "" + assert empty_msg.text() == "" # type: ignore[operator] + assert str(empty_msg.text) == empty_msg.text() # type: ignore[operator] From 54a3c5f85cc17a32b4052d818d0eba23dd03b92f Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Mon, 11 Aug 2025 14:53:12 -0400 Subject: [PATCH 02/73] x --- .../language_models/chat_models.py | 14 ++ libs/core/tests/unit_tests/test_messages.py | 207 ------------------ 2 files changed, 14 insertions(+), 207 deletions(-) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 310f392fd2546..06838e3b07592 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -41,6 +41,7 @@ BaseMessageChunk, HumanMessage, convert_to_messages, + convert_to_openai_data_block, convert_to_openai_image_block, is_data_content_block, message_chunk_to_message, @@ -130,6 +131,19 @@ def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]: message_to_trace.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy convert_to_openai_image_block(block) ) + elif ( + block.get("type") == "file" + and is_data_content_block(block) + and "base64" in block + ): + if message_to_trace is message: + # Shallow copy + message_to_trace = message.model_copy() + message_to_trace.content = list(message_to_trace.content) + + message_to_trace.content[idx] = convert_to_openai_data_block( # type: ignore[index] + block + ) elif len(block) == 1 and "type" not in block: # Tracing assumes all content blocks have a "type" key. Here # we add this key if it is missing, and there's an obvious diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index 89c77414dc0b8..900e42691dede 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -36,11 +36,6 @@ from langchain_core.messages.tool import tool_call as create_tool_call from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk from langchain_core.utils._merge import merge_lists -from langchain_core.v1.messages import AIMessage as AIMessageV1 -from langchain_core.v1.messages import AIMessageChunk as AIMessageChunkV1 -from langchain_core.v1.messages import HumanMessage as HumanMessageV1 -from langchain_core.v1.messages import SystemMessage as SystemMessageV1 -from langchain_core.v1.messages import ToolMessage as ToolMessageV1 def test_message_init() -> None: @@ -203,155 +198,6 @@ def test_message_chunks() -> None: assert (meaningful_id + default_id).id == "msg_def456" -def test_message_chunks_v1() -> None: - left = AIMessageChunkV1("foo ", id="abc") - right = AIMessageChunkV1("bar") - expected = AIMessageChunkV1("foo bar", id="abc") - assert left + right == expected - - # Test tool calls - one = AIMessageChunkV1( - [], - tool_call_chunks=[ - create_tool_call_chunk(name="tool1", args="", id="1", index=0) - ], - ) - two = AIMessageChunkV1( - [], - tool_call_chunks=[ - create_tool_call_chunk(name=None, args='{"arg1": "val', id=None, index=0) - ], - ) - three = AIMessageChunkV1( - [], - tool_call_chunks=[ - create_tool_call_chunk(name=None, args='ue}"', id=None, index=0) - ], - ) - result = one + two + three - expected = AIMessageChunkV1( - [], - tool_call_chunks=[ - create_tool_call_chunk( - name="tool1", args='{"arg1": "value}"', id="1", index=0 - ) - ], - id=result.id, # Use the same ID as the result - ) - assert result == expected - - converted_message = result.to_message() - assert converted_message == AIMessageV1( - content=[ - { - "name": "tool1", - "args": {"arg1": "value}"}, - "id": "1", - "type": "tool_call", - } - ], - id=converted_message.id, # Use the same ID as the converted message - ) - - chunk1 = AIMessageChunkV1( - [], - tool_call_chunks=[ - create_tool_call_chunk(name="tool1", args="", id="1", index=0) - ], - ) - chunk2 = AIMessageChunkV1( - [], - tool_call_chunks=[ - create_tool_call_chunk(name="tool1", args="a", id=None, index=1) - ], - ) - # Don't merge if `index` field does not match. - merge_result = chunk1 + chunk2 - assert merge_result == AIMessageChunkV1( - [], - tool_call_chunks=[ - create_tool_call_chunk(name="tool1", args="", id="1", index=0), - create_tool_call_chunk(name="tool1", args="a", id=None, index=1), - ], - id=merge_result.id, # Use the same ID as the merge result - ) - - ai_msg_chunk = AIMessageChunkV1([]) - tool_calls_msg_chunk = AIMessageChunkV1( - [], - tool_call_chunks=[ - create_tool_call_chunk(name="tool1", args="a", id=None, index=1) - ], - ) - # These assertions test that adding empty chunks preserves the non-empty chunk - result1 = ai_msg_chunk + tool_calls_msg_chunk - assert result1.tool_call_chunks == tool_calls_msg_chunk.tool_call_chunks - assert result1.content == tool_calls_msg_chunk.content - - result2 = tool_calls_msg_chunk + ai_msg_chunk - assert result2.tool_call_chunks == tool_calls_msg_chunk.tool_call_chunks - assert result2.content == tool_calls_msg_chunk.content - - ai_msg_chunk = AIMessageChunkV1( - [], - tool_call_chunks=[ - create_tool_call_chunk(name="tool1", args="", id="1", index=0) - ], - ) - assert ai_msg_chunk.tool_calls == [create_tool_call(name="tool1", args={}, id="1")] - - # Test token usage - left = AIMessageChunkV1( - [], - usage_metadata={"input_tokens": 1, "output_tokens": 2, "total_tokens": 3}, - ) - right = AIMessageChunkV1( - [], - usage_metadata={"input_tokens": 4, "output_tokens": 5, "total_tokens": 9}, - ) - usage_result = left + right - expected_usage = AIMessageChunkV1( - content=[], - usage_metadata={"input_tokens": 5, "output_tokens": 7, "total_tokens": 12}, - id=usage_result.id, # Use the same ID as the result - ) - assert usage_result == expected_usage - - # Test adding empty chunks preserves the original - left_result = AIMessageChunkV1(content=[]) + left - assert left_result.usage_metadata == left.usage_metadata - assert left_result.content == left.content - - right_result = right + AIMessageChunkV1(content=[]) - assert right_result.usage_metadata == right.usage_metadata - assert right_result.content == right.content - - # Test ID order of precedence - # Note: AIMessageChunkV1 always generates an ID if none provided - auto_id = AIMessageChunkV1(content=[]) # Gets auto-generated lc_* ID - default_id = AIMessageChunkV1( - content=[], id="lc_run--abc123" - ) # LangChain-assigned run ID - meaningful_id = AIMessageChunkV1( - content=[], id="msg_def456" - ) # provider-assigned ID - - # Provider-assigned IDs always win over LangChain-generated IDs - assert (auto_id + meaningful_id).id == "msg_def456" # provider-assigned wins - assert (meaningful_id + auto_id).id == "msg_def456" # provider-assigned wins - - assert ( - default_id + meaningful_id - ).id == "msg_def456" # meaningful_id is provider-assigned - assert ( - meaningful_id + default_id - ).id == "msg_def456" # meaningful_id is provider-assigned - - # Between auto-generated and lc_run--* IDs, run IDs win - assert (auto_id + default_id).id == default_id.id - assert (default_id + auto_id).id == default_id.id - - def test_chat_message_chunks() -> None: assert ChatMessageChunk(role="User", content="I am", id="ai4") + ChatMessageChunk( role="User", content=" indeed." @@ -1388,56 +1234,3 @@ def test_known_block_types() -> None: for t in expected } assert expected == KNOWN_BLOCK_TYPES - - -def test_v1_text_accessor() -> None: - """Test that v1 message.text property and .text() method return the same value.""" - # Test HumanMessage - human_msg = HumanMessageV1(content="Hello world") - assert human_msg.text == "Hello world" - assert human_msg.text() == "Hello world" # type: ignore[operator] - assert str(human_msg.text) == human_msg.text() # type: ignore[operator] - - # Test SystemMessage - system_msg = SystemMessageV1(content="You are a helpful assistant") - assert system_msg.text == "You are a helpful assistant" - assert system_msg.text() == "You are a helpful assistant" # type: ignore[operator] - assert str(system_msg.text) == system_msg.text() # type: ignore[operator] - - # Test AIMessage - ai_msg = AIMessageV1(content="I can help you with that") - assert ai_msg.text == "I can help you with that" - assert ai_msg.text() == "I can help you with that" # type: ignore[operator] - assert str(ai_msg.text) == ai_msg.text() # type: ignore[operator] - - # Test ToolMessage - tool_msg = ToolMessageV1(content="Task completed", tool_call_id="tool_1") - assert tool_msg.text == "Task completed" - assert tool_msg.text() == "Task completed" # type: ignore[operator] - assert str(tool_msg.text) == tool_msg.text() # type: ignore[operator] - - # Test with complex content (list of content blocks) - complex_msg = HumanMessageV1( - content=[{"type": "text", "text": "Hello "}, {"type": "text", "text": "world"}] - ) - assert complex_msg.text == "Hello world" - assert complex_msg.text() == "Hello world" # type: ignore[operator] - assert str(complex_msg.text) == complex_msg.text() # type: ignore[operator] - - # Test with mixed content (text and non-text blocks) - mixed_msg = AIMessageV1( - content=[ - {"type": "text", "text": "The answer is "}, - {"type": "tool_call", "name": "calculate", "args": {"x": 2}, "id": "1"}, - {"type": "text", "text": "42"}, - ] - ) - assert mixed_msg.text == "The answer is 42" - assert mixed_msg.text() == "The answer is 42" # type: ignore[operator] - assert str(mixed_msg.text) == mixed_msg.text() # type: ignore[operator] - - # Test empty content - empty_msg = HumanMessageV1(content=[]) - assert empty_msg.text == "" - assert empty_msg.text() == "" # type: ignore[operator] - assert str(empty_msg.text) == empty_msg.text() # type: ignore[operator] From f8244b9108bddb10aa1d604485e562bee70e03fc Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Mon, 11 Aug 2025 16:33:48 -0400 Subject: [PATCH 03/73] type required on tool_call_chunk; keep messages.tool.ToolCallChunk --- .../langchain_core/messages/content_blocks.py | 2 +- libs/core/langchain_core/messages/tool.py | 35 +++++++++++++++++-- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index d426339156d39..daf5112507406 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -334,7 +334,7 @@ class ToolCallChunk(TypedDict): # TODO: Consider making fields NotRequired[str] in the future. - type: NotRequired[Literal["tool_call_chunk"]] + type: Literal["tool_call_chunk"] """Used for serialization.""" id: Optional[str] diff --git a/libs/core/langchain_core/messages/tool.py b/libs/core/langchain_core/messages/tool.py index 181c80443d56f..33755ce0ecb44 100644 --- a/libs/core/langchain_core/messages/tool.py +++ b/libs/core/langchain_core/messages/tool.py @@ -5,12 +5,11 @@ from uuid import UUID from pydantic import Field, model_validator -from typing_extensions import override +from typing_extensions import NotRequired, TypedDict, override from langchain_core.messages.base import BaseMessage, BaseMessageChunk, merge_content from langchain_core.messages.content_blocks import InvalidToolCall as InvalidToolCall from langchain_core.messages.content_blocks import ToolCall as ToolCall -from langchain_core.messages.content_blocks import ToolCallChunk as ToolCallChunk from langchain_core.utils._merge import merge_dicts, merge_obj @@ -196,6 +195,38 @@ def tool_call( return ToolCall(name=name, args=args, id=id, type="tool_call") +class ToolCallChunk(TypedDict): + """A chunk of a tool call (e.g., as part of a stream). + + When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), + all string attributes are concatenated. Chunks are only merged if their + values of `index` are equal and not None. + + Example: + + .. code-block:: python + + left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] + right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] + + ( + AIMessageChunk(content="", tool_call_chunks=left_chunks) + + AIMessageChunk(content="", tool_call_chunks=right_chunks) + ).tool_call_chunks == [ToolCallChunk(name='foo', args='{"a":1}', index=0)] + + """ + + name: Optional[str] + """The name of the tool to be called.""" + args: Optional[str] + """The arguments to the tool call.""" + id: Optional[str] + """An identifier associated with the tool call.""" + index: Optional[int] + """The index of the tool call in a sequence.""" + type: NotRequired[Literal["tool_call_chunk"]] + + def tool_call_chunk( *, name: Optional[str] = None, From 1b9ec25755013d39025e5982d029dae591bcb5b6 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Mon, 11 Aug 2025 16:52:08 -0400 Subject: [PATCH 04/73] update init on aimessage --- libs/core/langchain_core/messages/ai.py | 65 ++++++++++++++--- libs/core/langchain_core/messages/base.py | 72 ++++++++++++++++--- .../core/tests/unit_tests/messages/test_ai.py | 71 ++++++++++++++++++ .../prompts/__snapshots__/test_chat.ambr | 20 ++---- .../runnables/__snapshots__/test_graph.ambr | 10 +-- libs/core/tests/unit_tests/test_messages.py | 32 +++++++++ 6 files changed, 232 insertions(+), 38 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 45eb7d2ba5e8d..9d4a1e01879df 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -3,11 +3,12 @@ import json import logging import operator -from typing import Any, Literal, Optional, Union, cast +from typing import Any, Literal, Optional, Union, cast, overload from pydantic import model_validator from typing_extensions import NotRequired, Self, TypedDict, override +from langchain_core.messages import content_blocks as types from langchain_core.messages.base import BaseMessage, BaseMessageChunk, merge_content from langchain_core.messages.tool import ( InvalidToolCall, @@ -179,16 +180,35 @@ class AIMessage(BaseMessage): type: Literal["ai"] = "ai" """The type of the message (used for deserialization). Defaults to "ai".""" + @overload def __init__( - self, content: Union[str, list[Union[str, dict]]], **kwargs: Any - ) -> None: - """Pass in content as positional arg. + self, + content: Union[str, list[Union[str, dict]]], + **kwargs: Any, + ) -> None: ... - Args: - content: The content of the message. - kwargs: Additional arguments to pass to the parent class. - """ - super().__init__(content=content, **kwargs) + @overload + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: ... + + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: + """Specify content as a positional arg or content_blocks for typing support.""" + if content_blocks is not None: + super().__init__( + content=cast("Union[str, list[Union[str, dict]]]", content_blocks), + **kwargs, + ) + else: + super().__init__(content=content, **kwargs) @property def lc_attributes(self) -> dict: @@ -198,6 +218,33 @@ def lc_attributes(self) -> dict: "invalid_tool_calls": self.invalid_tool_calls, } + @property + def content_blocks(self) -> list[types.ContentBlock]: + """Return content blocks of the message.""" + blocks = super().content_blocks + + # Add from tool_calls if missing from content + content_tool_call_ids = { + block.get("id") + for block in self.content + if isinstance(block, dict) and block.get("type") == "tool_call" + } + for tool_call in self.tool_calls: + if (id_ := tool_call.get("id")) and id_ not in content_tool_call_ids: + tool_call_block: types.ToolCall = { + "type": "tool_call", + "id": id_, + "name": tool_call["name"], + "args": tool_call["args"], + } + if "index" in tool_call: + tool_call_block["index"] = tool_call["index"] + if "extras" in tool_call: + tool_call_block["extras"] = tool_call["extras"] + blocks.append(tool_call_block) + + return blocks + # TODO: remove this logic if possible, reducing breaking nature of changes @model_validator(mode="before") @classmethod diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index ba976286b75d8..13b12f764d19e 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -2,11 +2,12 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Optional, Union, cast +from typing import TYPE_CHECKING, Any, Optional, Union, cast, overload from pydantic import ConfigDict, Field from langchain_core.load.serializable import Serializable +from langchain_core.messages import content_blocks as types from langchain_core.utils import get_bolded_text from langchain_core.utils._merge import merge_dicts, merge_lists from langchain_core.utils.interactive_env import is_interactive_env @@ -61,15 +62,32 @@ class BaseMessage(Serializable): extra="allow", ) + @overload def __init__( - self, content: Union[str, list[Union[str, dict]]], **kwargs: Any - ) -> None: - """Pass in content as positional arg. + self, + content: Union[str, list[Union[str, dict]]], + **kwargs: Any, + ) -> None: ... - Args: - content: The string contents of the message. - """ - super().__init__(content=content, **kwargs) + @overload + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: ... + + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: + """Specify content as a positional arg or content_blocks for typing support.""" + if content_blocks is not None: + super().__init__(content=content_blocks, **kwargs) + else: + super().__init__(content=content, **kwargs) @classmethod def is_lc_serializable(cls) -> bool: @@ -88,6 +106,44 @@ def get_lc_namespace(cls) -> list[str]: """ return ["langchain", "schema", "messages"] + @property + def content_blocks(self) -> list[types.ContentBlock]: + """Return the content as a list of standard ContentBlocks. + + To use this property, the corresponding chat model must support + ``message_version="v1"`` or higher: + + .. code-block:: python + + from langchain.chat_models import init_chat_model + llm = init_chat_model("...", message_version="v1") + + otherwise, does best-effort parsing to standard types. + """ + blocks: list[types.ContentBlock] = [] + content = ( + [self.content] + if isinstance(self.content, str) and self.content + else self.content + ) + for item in content: + if isinstance(item, str): + blocks.append({"type": "text", "text": item}) + elif isinstance(item, dict): + item_type = item.get("type") + if item_type not in types.KNOWN_BLOCK_TYPES: + msg = ( + f"Non-standard content block type '{item_type}'. Ensure " + "the model supports `output_version='v1'` or higher and " + "that this attribute is set on initialization." + ) + raise ValueError(msg) + blocks.append(cast("types.ContentBlock", item)) + else: + pass + + return blocks + def text(self) -> str: """Get the text content of the message. diff --git a/libs/core/tests/unit_tests/messages/test_ai.py b/libs/core/tests/unit_tests/messages/test_ai.py index d36d034712817..81981725c5005 100644 --- a/libs/core/tests/unit_tests/messages/test_ai.py +++ b/libs/core/tests/unit_tests/messages/test_ai.py @@ -1,5 +1,6 @@ from langchain_core.load import dumpd, load from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content_blocks as types from langchain_core.messages.ai import ( InputTokenDetails, OutputTokenDetails, @@ -196,3 +197,73 @@ def test_add_ai_message_chunks_usage() -> None: output_token_details=OutputTokenDetails(audio=1, reasoning=2), ), ) + + +def test_content_blocks() -> None: + message = AIMessage( + "", + tool_calls=[ + {"type": "tool_call", "name": "foo", "args": {"a": "b"}, "id": "abc_123"} + ], + ) + assert len(message.content_blocks) == 1 + assert message.content_blocks[0]["type"] == "tool_call" + assert message.content_blocks == [ + {"type": "tool_call", "id": "abc_123", "name": "foo", "args": {"a": "b"}} + ] + assert message.content == "" + + message = AIMessage( + "foo", + tool_calls=[ + {"type": "tool_call", "name": "foo", "args": {"a": "b"}, "id": "abc_123"} + ], + ) + assert len(message.content_blocks) == 2 + assert message.content_blocks[0]["type"] == "text" + assert message.content_blocks[1]["type"] == "tool_call" + assert message.content_blocks == [ + {"type": "text", "text": "foo"}, + {"type": "tool_call", "id": "abc_123", "name": "foo", "args": {"a": "b"}}, + ] + assert message.content == "foo" + + # With standard blocks + standard_content: list[types.ContentBlock] = [ + {"type": "reasoning", "reasoning": "foo"}, + {"type": "text", "text": "bar"}, + { + "type": "text", + "text": "baz", + "annotations": [{"type": "citation", "url": "http://example.com"}], + }, + { + "type": "image", + "url": "http://example.com/image.png", + "extras": {"foo": "bar"}, + }, + { + "type": "non_standard", + "value": {"custom_key": "custom_value", "another_key": 123}, + }, + { + "type": "tool_call", + "name": "foo", + "args": {"a": "b"}, + "id": "abc_123", + }, + ] + missing_tool_call = { + "type": "tool_call", + "name": "bar", + "args": {"c": "d"}, + "id": "abc_234", + } + message = AIMessage( + content_blocks=standard_content, + tool_calls=[ + {"type": "tool_call", "name": "foo", "args": {"a": "b"}, "id": "abc_123"}, + missing_tool_call, + ], + ) + assert message.content_blocks == [*standard_content, missing_tool_call] diff --git a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr index f45b2f7dae9b6..08a1c528cfb6f 100644 --- a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr +++ b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr @@ -1062,9 +1062,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ``ToolCallChunks`` (e.g., via ``AIMessageChunk.__add__``), + When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their - values of ``index`` are equal and not ``None``. + values of `index` are equal and not None. Example: @@ -1090,10 +1090,6 @@ ]), 'title': 'Args', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -1134,9 +1130,9 @@ }), }), 'required': list([ - 'id', 'name', 'args', + 'id', 'index', ]), 'title': 'ToolCallChunk', @@ -2523,9 +2519,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ``ToolCallChunks`` (e.g., via ``AIMessageChunk.__add__``), + When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their - values of ``index`` are equal and not ``None``. + values of `index` are equal and not None. Example: @@ -2551,10 +2547,6 @@ ]), 'title': 'Args', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -2595,9 +2587,9 @@ }), }), 'required': list([ - 'id', 'name', 'args', + 'id', 'index', ]), 'title': 'ToolCallChunk', diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr index 1a5bcc93dad87..d3a746eaf7966 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr @@ -1465,9 +1465,9 @@ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ``ToolCallChunks`` (e.g., via ``AIMessageChunk.__add__``), + When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their - values of ``index`` are equal and not ``None``. + values of `index` are equal and not None. Example: @@ -1493,10 +1493,6 @@ ]), 'title': 'Args', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -1537,9 +1533,9 @@ }), }), 'required': list([ - 'id', 'name', 'args', + 'id', 'index', ]), 'title': 'ToolCallChunk', diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index 900e42691dede..7157c3341d1dc 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -1234,3 +1234,35 @@ def test_known_block_types() -> None: for t in expected } assert expected == KNOWN_BLOCK_TYPES + + +def test_typed_init() -> None: + # AIMessage + message = AIMessage("Hello") + assert message.content == "Hello" + assert message.content_blocks == [{"type": "text", "text": "Hello"}] + + message = AIMessage(content="Hello") + assert message.content == "Hello" + assert message.content_blocks == [{"type": "text", "text": "Hello"}] + + message = AIMessage(content_blocks=[{"type": "text", "text": "Hello"}]) + assert message.content == [{"type": "text", "text": "Hello"}] + assert message.content_blocks == [{"type": "text", "text": "Hello"}] + + # # HumanMessage + # message = HumanMessage("Hello") + # assert message.content == "Hello" + # assert message.content_blocks == [{"type": "text", "text": "Hello"}] + + # message = HumanMessage(content="Hello") + # assert message.content == "Hello" + # assert message.content_blocks == [{"type": "text", "text": "Hello"}] + + # message = HumanMessage(content_blocks=[{"type": "text", "text": "Hello"}]) + # assert message.content == [{"type": "text", "text": "Hello"}] + # assert message.content_blocks == [{"type": "text", "text": "Hello"}] + + # Test we get type errors for malformed blocks (type checker will complain if + # below type-ignores are unused). + _ = AIMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] From 8426db47f151bc8f39493b67a7c789e9ed5afcb3 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Mon, 11 Aug 2025 18:09:04 -0400 Subject: [PATCH 05/73] update init on HumanMessage, SystemMessage, ToolMessage --- libs/core/langchain_core/messages/human.py | 38 ++++++++++---- libs/core/langchain_core/messages/system.py | 38 ++++++++++---- libs/core/langchain_core/messages/tool.py | 38 ++++++++++---- libs/core/tests/unit_tests/test_messages.py | 58 ++++++++++++--------- libs/core/tests/unit_tests/test_tools.py | 12 ++--- 5 files changed, 126 insertions(+), 58 deletions(-) diff --git a/libs/core/langchain_core/messages/human.py b/libs/core/langchain_core/messages/human.py index 1be4cbfa9d3d9..041db0cdb9726 100644 --- a/libs/core/langchain_core/messages/human.py +++ b/libs/core/langchain_core/messages/human.py @@ -1,7 +1,8 @@ """Human message.""" -from typing import Any, Literal, Union +from typing import Any, Literal, Optional, Union, cast, overload +from langchain_core.messages import content_blocks as types from langchain_core.messages.base import BaseMessage, BaseMessageChunk @@ -41,16 +42,35 @@ class HumanMessage(BaseMessage): type: Literal["human"] = "human" """The type of the message (used for serialization). Defaults to "human".""" + @overload def __init__( - self, content: Union[str, list[Union[str, dict]]], **kwargs: Any - ) -> None: - """Pass in content as positional arg. + self, + content: Union[str, list[Union[str, dict]]], + **kwargs: Any, + ) -> None: ... + + @overload + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: ... - Args: - content: The string contents of the message. - kwargs: Additional fields to pass to the message. - """ - super().__init__(content=content, **kwargs) + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: + """Specify content as a positional arg or content_blocks for typing support.""" + if content_blocks is not None: + super().__init__( + content=cast("Union[str, list[Union[str, dict]]]", content_blocks), + **kwargs, + ) + else: + super().__init__(content=content, **kwargs) class HumanMessageChunk(HumanMessage, BaseMessageChunk): diff --git a/libs/core/langchain_core/messages/system.py b/libs/core/langchain_core/messages/system.py index d63bd53a0fee0..c8a5bbae5c859 100644 --- a/libs/core/langchain_core/messages/system.py +++ b/libs/core/langchain_core/messages/system.py @@ -1,7 +1,8 @@ """System message.""" -from typing import Any, Literal, Union +from typing import Any, Literal, Optional, Union, cast, overload +from langchain_core.messages import content_blocks as types from langchain_core.messages.base import BaseMessage, BaseMessageChunk @@ -34,16 +35,35 @@ class SystemMessage(BaseMessage): type: Literal["system"] = "system" """The type of the message (used for serialization). Defaults to "system".""" + @overload def __init__( - self, content: Union[str, list[Union[str, dict]]], **kwargs: Any - ) -> None: - """Pass in content as positional arg. + self, + content: Union[str, list[Union[str, dict]]], + **kwargs: Any, + ) -> None: ... + + @overload + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: ... - Args: - content: The string contents of the message. - kwargs: Additional fields to pass to the message. - """ - super().__init__(content=content, **kwargs) + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: + """Specify content as a positional arg or content_blocks for typing support.""" + if content_blocks is not None: + super().__init__( + content=cast("Union[str, list[Union[str, dict]]]", content_blocks), + **kwargs, + ) + else: + super().__init__(content=content, **kwargs) class SystemMessageChunk(SystemMessage, BaseMessageChunk): diff --git a/libs/core/langchain_core/messages/tool.py b/libs/core/langchain_core/messages/tool.py index 33755ce0ecb44..14177181480fc 100644 --- a/libs/core/langchain_core/messages/tool.py +++ b/libs/core/langchain_core/messages/tool.py @@ -1,12 +1,13 @@ """Messages for tools.""" import json -from typing import Any, Literal, Optional, Union +from typing import Any, Literal, Optional, Union, cast, overload from uuid import UUID from pydantic import Field, model_validator from typing_extensions import NotRequired, TypedDict, override +from langchain_core.messages import content_blocks as types from langchain_core.messages.base import BaseMessage, BaseMessageChunk, merge_content from langchain_core.messages.content_blocks import InvalidToolCall as InvalidToolCall from langchain_core.messages.content_blocks import ToolCall as ToolCall @@ -135,16 +136,35 @@ def coerce_args(cls, values: dict) -> dict: values["tool_call_id"] = str(tool_call_id) return values + @overload def __init__( - self, content: Union[str, list[Union[str, dict]]], **kwargs: Any - ) -> None: - """Create a ToolMessage. + self, + content: Union[str, list[Union[str, dict]]], + **kwargs: Any, + ) -> None: ... - Args: - content: The string contents of the message. - **kwargs: Additional fields. - """ - super().__init__(content=content, **kwargs) + @overload + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: ... + + def __init__( + self, + content: Optional[Union[str, list[Union[str, dict]]]] = None, + content_blocks: Optional[list[types.ContentBlock]] = None, + **kwargs: Any, + ) -> None: + """Specify content as a positional arg or content_blocks for typing support.""" + if content_blocks is not None: + super().__init__( + content=cast("Union[str, list[Union[str, dict]]]", content_blocks), + **kwargs, + ) + else: + super().__init__(content=content, **kwargs) class ToolMessageChunk(ToolMessage, BaseMessageChunk): diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index 7157c3341d1dc..d6857aef32de2 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -1041,12 +1041,12 @@ def test_tool_message_content() -> None: ToolMessage(["foo"], tool_call_id="1") ToolMessage([{"foo": "bar"}], tool_call_id="1") - assert ToolMessage(("a", "b", "c"), tool_call_id="1").content == ["a", "b", "c"] # type: ignore[arg-type] - assert ToolMessage(5, tool_call_id="1").content == "5" # type: ignore[arg-type] - assert ToolMessage(5.1, tool_call_id="1").content == "5.1" # type: ignore[arg-type] - assert ToolMessage({"foo": "bar"}, tool_call_id="1").content == "{'foo': 'bar'}" # type: ignore[arg-type] + assert ToolMessage(("a", "b", "c"), tool_call_id="1").content == ["a", "b", "c"] # type: ignore[call-overload] + assert ToolMessage(5, tool_call_id="1").content == "5" # type: ignore[call-overload] + assert ToolMessage(5.1, tool_call_id="1").content == "5.1" # type: ignore[call-overload] + assert ToolMessage({"foo": "bar"}, tool_call_id="1").content == "{'foo': 'bar'}" # type: ignore[call-overload] assert ( - ToolMessage(Document("foo"), tool_call_id="1").content == "page_content='foo'" # type: ignore[arg-type] + ToolMessage(Document("foo"), tool_call_id="1").content == "page_content='foo'" # type: ignore[call-overload] ) @@ -1237,32 +1237,40 @@ def test_known_block_types() -> None: def test_typed_init() -> None: - # AIMessage - message = AIMessage("Hello") - assert message.content == "Hello" - assert message.content_blocks == [{"type": "text", "text": "Hello"}] + ai_message = AIMessage(content_blocks=[{"type": "text", "text": "Hello"}]) + assert ai_message.content == [{"type": "text", "text": "Hello"}] + assert ai_message.content_blocks == ai_message.content - message = AIMessage(content="Hello") - assert message.content == "Hello" - assert message.content_blocks == [{"type": "text", "text": "Hello"}] + human_message = HumanMessage(content_blocks=[{"type": "text", "text": "Hello"}]) + assert human_message.content == [{"type": "text", "text": "Hello"}] + assert human_message.content_blocks == human_message.content - message = AIMessage(content_blocks=[{"type": "text", "text": "Hello"}]) - assert message.content == [{"type": "text", "text": "Hello"}] - assert message.content_blocks == [{"type": "text", "text": "Hello"}] + system_message = SystemMessage(content_blocks=[{"type": "text", "text": "Hello"}]) + assert system_message.content == [{"type": "text", "text": "Hello"}] + assert system_message.content_blocks == system_message.content - # # HumanMessage - # message = HumanMessage("Hello") - # assert message.content == "Hello" - # assert message.content_blocks == [{"type": "text", "text": "Hello"}] + tool_message = ToolMessage( + content_blocks=[{"type": "text", "text": "Hello"}], + tool_call_id="abc123", + ) + assert tool_message.content == [{"type": "text", "text": "Hello"}] + assert tool_message.content_blocks == tool_message.content - # message = HumanMessage(content="Hello") - # assert message.content == "Hello" - # assert message.content_blocks == [{"type": "text", "text": "Hello"}] + for message_class in [AIMessage, HumanMessage, SystemMessage]: + message = message_class("Hello") + assert message.content == "Hello" + assert message.content_blocks == [{"type": "text", "text": "Hello"}] - # message = HumanMessage(content_blocks=[{"type": "text", "text": "Hello"}]) - # assert message.content == [{"type": "text", "text": "Hello"}] - # assert message.content_blocks == [{"type": "text", "text": "Hello"}] + message = message_class(content="Hello") + assert message.content == "Hello" + assert message.content_blocks == [{"type": "text", "text": "Hello"}] # Test we get type errors for malformed blocks (type checker will complain if # below type-ignores are unused). _ = AIMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] + _ = HumanMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] + _ = SystemMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] + _ = ToolMessage( + content_blocks=[{"type": "text", "bad": "Hello"}], # type: ignore[list-item] + tool_call_id="abc123", + ) diff --git a/libs/core/tests/unit_tests/test_tools.py b/libs/core/tests/unit_tests/test_tools.py index 72c6a5a387cfb..af0cdec45ce21 100644 --- a/libs/core/tests/unit_tests/test_tools.py +++ b/libs/core/tests/unit_tests/test_tools.py @@ -2281,7 +2281,7 @@ def test_tool_injected_tool_call_id() -> None: @tool def foo(x: int, tool_call_id: Annotated[str, InjectedToolCallId]) -> ToolMessage: """Foo.""" - return ToolMessage(x, tool_call_id=tool_call_id) # type: ignore[arg-type] + return ToolMessage(x, tool_call_id=tool_call_id) # type: ignore[call-overload] assert foo.invoke( { @@ -2290,7 +2290,7 @@ def foo(x: int, tool_call_id: Annotated[str, InjectedToolCallId]) -> ToolMessage "name": "foo", "id": "bar", } - ) == ToolMessage(0, tool_call_id="bar") # type: ignore[arg-type] + ) == ToolMessage(0, tool_call_id="bar") # type: ignore[call-overload] with pytest.raises( ValueError, @@ -2302,7 +2302,7 @@ def foo(x: int, tool_call_id: Annotated[str, InjectedToolCallId]) -> ToolMessage @tool def foo2(x: int, tool_call_id: Annotated[str, InjectedToolCallId()]) -> ToolMessage: """Foo.""" - return ToolMessage(x, tool_call_id=tool_call_id) # type: ignore[arg-type] + return ToolMessage(x, tool_call_id=tool_call_id) # type: ignore[call-overload] assert foo2.invoke( { @@ -2311,14 +2311,14 @@ def foo2(x: int, tool_call_id: Annotated[str, InjectedToolCallId()]) -> ToolMess "name": "foo", "id": "bar", } - ) == ToolMessage(0, tool_call_id="bar") # type: ignore[arg-type] + ) == ToolMessage(0, tool_call_id="bar") # type: ignore[call-overload] def test_tool_uninjected_tool_call_id() -> None: @tool def foo(x: int, tool_call_id: str) -> ToolMessage: """Foo.""" - return ToolMessage(x, tool_call_id=tool_call_id) # type: ignore[arg-type] + return ToolMessage(x, tool_call_id=tool_call_id) # type: ignore[call-overload] with pytest.raises(ValueError, match="1 validation error for foo"): foo.invoke({"type": "tool_call", "args": {"x": 0}, "name": "foo", "id": "bar"}) @@ -2330,7 +2330,7 @@ def foo(x: int, tool_call_id: str) -> ToolMessage: "name": "foo", "id": "bar", } - ) == ToolMessage(0, tool_call_id="zap") # type: ignore[arg-type] + ) == ToolMessage(0, tool_call_id="zap") # type: ignore[call-overload] def test_tool_return_output_mixin() -> None: From 0ddab9ff20faa3bee52892b9b2ce87c9316ef2e4 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Tue, 12 Aug 2025 10:59:50 -0400 Subject: [PATCH 06/73] start on duplicate content --- libs/core/langchain_core/messages/ai.py | 70 ++++++++++++------- libs/core/langchain_core/messages/base.py | 69 ++++++++++-------- libs/core/langchain_core/messages/human.py | 7 +- libs/core/langchain_core/messages/system.py | 7 +- libs/core/langchain_core/messages/tool.py | 7 +- .../language_models/chat_models/test_base.py | 44 ++++-------- libs/core/tests/unit_tests/test_messages.py | 19 +++-- 7 files changed, 129 insertions(+), 94 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 9d4a1e01879df..6983143b6fa6b 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -202,11 +202,16 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content_blocks is not None: + if content is not None and content_blocks is None: + super().__init__(content=content, **kwargs) + elif content is None and content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), + content_blocks=content_blocks, **kwargs, ) + elif content is not None and content_blocks is not None: + super().__init__(content=content, content_blocks=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) @@ -218,32 +223,45 @@ def lc_attributes(self) -> dict: "invalid_tool_calls": self.invalid_tool_calls, } - @property - def content_blocks(self) -> list[types.ContentBlock]: - """Return content blocks of the message.""" - blocks = super().content_blocks - - # Add from tool_calls if missing from content - content_tool_call_ids = { - block.get("id") - for block in self.content - if isinstance(block, dict) and block.get("type") == "tool_call" - } - for tool_call in self.tool_calls: - if (id_ := tool_call.get("id")) and id_ not in content_tool_call_ids: - tool_call_block: types.ToolCall = { - "type": "tool_call", - "id": id_, - "name": tool_call["name"], - "args": tool_call["args"], - } - if "index" in tool_call: - tool_call_block["index"] = tool_call["index"] - if "extras" in tool_call: - tool_call_block["extras"] = tool_call["extras"] - blocks.append(tool_call_block) + @model_validator(mode="after") + def _init_content_blocks(self) -> Self: + """Assign the content as a list of standard ContentBlocks. + + To use this property, the corresponding chat model must support + ``message_version="v1"`` or higher: - return blocks + .. code-block:: python + + from langchain.chat_models import init_chat_model + llm = init_chat_model("...", message_version="v1") + + otherwise, does best-effort parsing to standard types. + """ + if not self.content_blocks: + self.content_blocks = self._init_text_content(self.content) + + if self.tool_calls or self.invalid_tool_calls: + # Add from tool_calls if missing from content + content_tool_call_ids = { + block.get("id") + for block in self.content_blocks + if isinstance(block, dict) and block.get("type") == "tool_call" + } + for tool_call in self.tool_calls: + if (id_ := tool_call.get("id")) and id_ not in content_tool_call_ids: + tool_call_block: types.ToolCall = { + "type": "tool_call", + "id": id_, + "name": tool_call["name"], + "args": tool_call["args"], + } + if "index" in tool_call: + tool_call_block["index"] = tool_call["index"] + if "extras" in tool_call: + tool_call_block["extras"] = tool_call["extras"] + self.content_blocks.append(tool_call_block) + + return self # TODO: remove this logic if possible, reducing breaking nature of changes @model_validator(mode="before") diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 13b12f764d19e..ff88dd248eb18 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -4,7 +4,8 @@ from typing import TYPE_CHECKING, Any, Optional, Union, cast, overload -from pydantic import ConfigDict, Field +from pydantic import ConfigDict, Field, model_validator +from typing_extensions import Self from langchain_core.load.serializable import Serializable from langchain_core.messages import content_blocks as types @@ -27,6 +28,9 @@ class BaseMessage(Serializable): content: Union[str, list[Union[str, dict]]] """The string contents of the message.""" + content_blocks: list[types.ContentBlock] = Field(default_factory=list) + """The content of the message as a list of standard ContentBlocks.""" + additional_kwargs: dict = Field(default_factory=dict) """Reserved for additional payload data associated with the message. @@ -84,8 +88,14 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content_blocks is not None: - super().__init__(content=content_blocks, **kwargs) + if content is not None and content_blocks is None: + super().__init__(content=content, **kwargs) + elif content is None and content_blocks is not None: + super().__init__( + content=content_blocks, content_blocks=content_blocks, **kwargs + ) + elif content is not None and content_blocks is not None: + super().__init__(content=content, content_blocks=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) @@ -106,9 +116,30 @@ def get_lc_namespace(cls) -> list[str]: """ return ["langchain", "schema", "messages"] - @property - def content_blocks(self) -> list[types.ContentBlock]: - """Return the content as a list of standard ContentBlocks. + @staticmethod + def _init_text_content( + content: Union[str, list[Union[str, dict]]], + ) -> list[types.ContentBlock]: + """Parse string content into a list of ContentBlocks.""" + blocks: list[types.ContentBlock] = [] + content = [content] if isinstance(content, str) and content else content + for item in content: + if isinstance(item, str): + blocks.append({"type": "text", "text": item}) + elif isinstance(item, dict): + item_type = item.get("type") + if item_type not in types.KNOWN_BLOCK_TYPES: + blocks.append({"type": "non_standard", "value": item}) + else: + blocks.append(cast("types.ContentBlock", item)) + else: + pass + + return blocks + + @model_validator(mode="after") + def _init_content_blocks(self) -> Self: + """Assign the content as a list of standard ContentBlocks. To use this property, the corresponding chat model must support ``message_version="v1"`` or higher: @@ -120,29 +151,11 @@ def content_blocks(self) -> list[types.ContentBlock]: otherwise, does best-effort parsing to standard types. """ - blocks: list[types.ContentBlock] = [] - content = ( - [self.content] - if isinstance(self.content, str) and self.content - else self.content - ) - for item in content: - if isinstance(item, str): - blocks.append({"type": "text", "text": item}) - elif isinstance(item, dict): - item_type = item.get("type") - if item_type not in types.KNOWN_BLOCK_TYPES: - msg = ( - f"Non-standard content block type '{item_type}'. Ensure " - "the model supports `output_version='v1'` or higher and " - "that this attribute is set on initialization." - ) - raise ValueError(msg) - blocks.append(cast("types.ContentBlock", item)) - else: - pass + if not self.content_blocks: + blocks = self._init_text_content(self.content) + self.content_blocks = blocks - return blocks + return self def text(self) -> str: """Get the text content of the message. diff --git a/libs/core/langchain_core/messages/human.py b/libs/core/langchain_core/messages/human.py index 041db0cdb9726..5383bb964d4fc 100644 --- a/libs/core/langchain_core/messages/human.py +++ b/libs/core/langchain_core/messages/human.py @@ -64,11 +64,16 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content_blocks is not None: + if content is not None and content_blocks is None: + super().__init__(content=content, **kwargs) + elif content is None and content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), + content_blocks=content_blocks, **kwargs, ) + elif content is not None and content_blocks is not None: + super().__init__(content=content, content_blocks=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) diff --git a/libs/core/langchain_core/messages/system.py b/libs/core/langchain_core/messages/system.py index c8a5bbae5c859..976ea55d15685 100644 --- a/libs/core/langchain_core/messages/system.py +++ b/libs/core/langchain_core/messages/system.py @@ -57,11 +57,16 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content_blocks is not None: + if content is not None and content_blocks is None: + super().__init__(content=content, **kwargs) + elif content is None and content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), + content_blocks=content_blocks, **kwargs, ) + elif content is not None and content_blocks is not None: + super().__init__(content=content, content_blocks=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) diff --git a/libs/core/langchain_core/messages/tool.py b/libs/core/langchain_core/messages/tool.py index 14177181480fc..78b4f14c3a953 100644 --- a/libs/core/langchain_core/messages/tool.py +++ b/libs/core/langchain_core/messages/tool.py @@ -158,11 +158,16 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content_blocks is not None: + if content is not None and content_blocks is None: + super().__init__(content=content, **kwargs) + elif content is None and content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), + content_blocks=content_blocks, **kwargs, ) + elif content is not None and content_blocks is not None: + super().__init__(content=content, content_blocks=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index 37b05ed825566..12cd42ddc322c 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -443,19 +443,11 @@ def test_trace_images_in_openai_format() -> None: ] tracer = FakeChatModelStartTracer() response = llm.invoke(messages, config={"callbacks": [tracer]}) - assert tracer.messages == [ - [ - [ - HumanMessage( - content=[ - { - "type": "image_url", - "image_url": {"url": "https://example.com/image.png"}, - } - ] - ) - ] - ] + assert tracer.messages[0][0][0].content == [ + { + "type": "image_url", + "image_url": {"url": "https://example.com/image.png"}, + } ] # Test no mutation assert response.content == [ @@ -486,23 +478,15 @@ def test_trace_content_blocks_with_no_type_key() -> None: ] tracer = FakeChatModelStartTracer() response = llm.invoke(messages, config={"callbacks": [tracer]}) - assert tracer.messages == [ - [ - [ - HumanMessage( - [ - { - "type": "text", - "text": "Hello", - }, - { - "type": "cachePoint", - "cachePoint": {"type": "default"}, - }, - ] - ) - ] - ] + assert tracer.messages[0][0][0].content == [ + { + "type": "text", + "text": "Hello", + }, + { + "type": "cachePoint", + "cachePoint": {"type": "default"}, + }, ] # Test no mutation assert response.content == [ diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index d6857aef32de2..ec17985c31baa 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -3,6 +3,7 @@ from typing import Optional, Union import pytest +from pydantic import ValidationError from typing_extensions import get_args from langchain_core.documents import Document @@ -1267,10 +1268,14 @@ def test_typed_init() -> None: # Test we get type errors for malformed blocks (type checker will complain if # below type-ignores are unused). - _ = AIMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] - _ = HumanMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] - _ = SystemMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] - _ = ToolMessage( - content_blocks=[{"type": "text", "bad": "Hello"}], # type: ignore[list-item] - tool_call_id="abc123", - ) + with pytest.raises(ValidationError): + _ = AIMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] + with pytest.raises(ValidationError): + _ = HumanMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] + with pytest.raises(ValidationError): + _ = SystemMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] + with pytest.raises(ValidationError): + _ = ToolMessage( + content_blocks=[{"type": "text", "bad": "Hello"}], # type: ignore[list-item] + tool_call_id="abc123", + ) From 98d5f469e37e2e9553483b450d6230546c8e0649 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Tue, 12 Aug 2025 11:00:02 -0400 Subject: [PATCH 07/73] Revert "start on duplicate content" This reverts commit 0ddab9ff20faa3bee52892b9b2ce87c9316ef2e4. --- libs/core/langchain_core/messages/ai.py | 70 +++++++------------ libs/core/langchain_core/messages/base.py | 69 ++++++++---------- libs/core/langchain_core/messages/human.py | 7 +- libs/core/langchain_core/messages/system.py | 7 +- libs/core/langchain_core/messages/tool.py | 7 +- .../language_models/chat_models/test_base.py | 44 ++++++++---- libs/core/tests/unit_tests/test_messages.py | 19 ++--- 7 files changed, 94 insertions(+), 129 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 6983143b6fa6b..9d4a1e01879df 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -202,16 +202,11 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content is not None and content_blocks is None: - super().__init__(content=content, **kwargs) - elif content is None and content_blocks is not None: + if content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), - content_blocks=content_blocks, **kwargs, ) - elif content is not None and content_blocks is not None: - super().__init__(content=content, content_blocks=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) @@ -223,45 +218,32 @@ def lc_attributes(self) -> dict: "invalid_tool_calls": self.invalid_tool_calls, } - @model_validator(mode="after") - def _init_content_blocks(self) -> Self: - """Assign the content as a list of standard ContentBlocks. - - To use this property, the corresponding chat model must support - ``message_version="v1"`` or higher: - - .. code-block:: python - - from langchain.chat_models import init_chat_model - llm = init_chat_model("...", message_version="v1") - - otherwise, does best-effort parsing to standard types. - """ - if not self.content_blocks: - self.content_blocks = self._init_text_content(self.content) - - if self.tool_calls or self.invalid_tool_calls: - # Add from tool_calls if missing from content - content_tool_call_ids = { - block.get("id") - for block in self.content_blocks - if isinstance(block, dict) and block.get("type") == "tool_call" - } - for tool_call in self.tool_calls: - if (id_ := tool_call.get("id")) and id_ not in content_tool_call_ids: - tool_call_block: types.ToolCall = { - "type": "tool_call", - "id": id_, - "name": tool_call["name"], - "args": tool_call["args"], - } - if "index" in tool_call: - tool_call_block["index"] = tool_call["index"] - if "extras" in tool_call: - tool_call_block["extras"] = tool_call["extras"] - self.content_blocks.append(tool_call_block) + @property + def content_blocks(self) -> list[types.ContentBlock]: + """Return content blocks of the message.""" + blocks = super().content_blocks + + # Add from tool_calls if missing from content + content_tool_call_ids = { + block.get("id") + for block in self.content + if isinstance(block, dict) and block.get("type") == "tool_call" + } + for tool_call in self.tool_calls: + if (id_ := tool_call.get("id")) and id_ not in content_tool_call_ids: + tool_call_block: types.ToolCall = { + "type": "tool_call", + "id": id_, + "name": tool_call["name"], + "args": tool_call["args"], + } + if "index" in tool_call: + tool_call_block["index"] = tool_call["index"] + if "extras" in tool_call: + tool_call_block["extras"] = tool_call["extras"] + blocks.append(tool_call_block) - return self + return blocks # TODO: remove this logic if possible, reducing breaking nature of changes @model_validator(mode="before") diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index ff88dd248eb18..13b12f764d19e 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -4,8 +4,7 @@ from typing import TYPE_CHECKING, Any, Optional, Union, cast, overload -from pydantic import ConfigDict, Field, model_validator -from typing_extensions import Self +from pydantic import ConfigDict, Field from langchain_core.load.serializable import Serializable from langchain_core.messages import content_blocks as types @@ -28,9 +27,6 @@ class BaseMessage(Serializable): content: Union[str, list[Union[str, dict]]] """The string contents of the message.""" - content_blocks: list[types.ContentBlock] = Field(default_factory=list) - """The content of the message as a list of standard ContentBlocks.""" - additional_kwargs: dict = Field(default_factory=dict) """Reserved for additional payload data associated with the message. @@ -88,14 +84,8 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content is not None and content_blocks is None: - super().__init__(content=content, **kwargs) - elif content is None and content_blocks is not None: - super().__init__( - content=content_blocks, content_blocks=content_blocks, **kwargs - ) - elif content is not None and content_blocks is not None: - super().__init__(content=content, content_blocks=content_blocks, **kwargs) + if content_blocks is not None: + super().__init__(content=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) @@ -116,30 +106,9 @@ def get_lc_namespace(cls) -> list[str]: """ return ["langchain", "schema", "messages"] - @staticmethod - def _init_text_content( - content: Union[str, list[Union[str, dict]]], - ) -> list[types.ContentBlock]: - """Parse string content into a list of ContentBlocks.""" - blocks: list[types.ContentBlock] = [] - content = [content] if isinstance(content, str) and content else content - for item in content: - if isinstance(item, str): - blocks.append({"type": "text", "text": item}) - elif isinstance(item, dict): - item_type = item.get("type") - if item_type not in types.KNOWN_BLOCK_TYPES: - blocks.append({"type": "non_standard", "value": item}) - else: - blocks.append(cast("types.ContentBlock", item)) - else: - pass - - return blocks - - @model_validator(mode="after") - def _init_content_blocks(self) -> Self: - """Assign the content as a list of standard ContentBlocks. + @property + def content_blocks(self) -> list[types.ContentBlock]: + """Return the content as a list of standard ContentBlocks. To use this property, the corresponding chat model must support ``message_version="v1"`` or higher: @@ -151,11 +120,29 @@ def _init_content_blocks(self) -> Self: otherwise, does best-effort parsing to standard types. """ - if not self.content_blocks: - blocks = self._init_text_content(self.content) - self.content_blocks = blocks + blocks: list[types.ContentBlock] = [] + content = ( + [self.content] + if isinstance(self.content, str) and self.content + else self.content + ) + for item in content: + if isinstance(item, str): + blocks.append({"type": "text", "text": item}) + elif isinstance(item, dict): + item_type = item.get("type") + if item_type not in types.KNOWN_BLOCK_TYPES: + msg = ( + f"Non-standard content block type '{item_type}'. Ensure " + "the model supports `output_version='v1'` or higher and " + "that this attribute is set on initialization." + ) + raise ValueError(msg) + blocks.append(cast("types.ContentBlock", item)) + else: + pass - return self + return blocks def text(self) -> str: """Get the text content of the message. diff --git a/libs/core/langchain_core/messages/human.py b/libs/core/langchain_core/messages/human.py index 5383bb964d4fc..041db0cdb9726 100644 --- a/libs/core/langchain_core/messages/human.py +++ b/libs/core/langchain_core/messages/human.py @@ -64,16 +64,11 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content is not None and content_blocks is None: - super().__init__(content=content, **kwargs) - elif content is None and content_blocks is not None: + if content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), - content_blocks=content_blocks, **kwargs, ) - elif content is not None and content_blocks is not None: - super().__init__(content=content, content_blocks=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) diff --git a/libs/core/langchain_core/messages/system.py b/libs/core/langchain_core/messages/system.py index 976ea55d15685..c8a5bbae5c859 100644 --- a/libs/core/langchain_core/messages/system.py +++ b/libs/core/langchain_core/messages/system.py @@ -57,16 +57,11 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content is not None and content_blocks is None: - super().__init__(content=content, **kwargs) - elif content is None and content_blocks is not None: + if content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), - content_blocks=content_blocks, **kwargs, ) - elif content is not None and content_blocks is not None: - super().__init__(content=content, content_blocks=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) diff --git a/libs/core/langchain_core/messages/tool.py b/libs/core/langchain_core/messages/tool.py index 78b4f14c3a953..14177181480fc 100644 --- a/libs/core/langchain_core/messages/tool.py +++ b/libs/core/langchain_core/messages/tool.py @@ -158,16 +158,11 @@ def __init__( **kwargs: Any, ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" - if content is not None and content_blocks is None: - super().__init__(content=content, **kwargs) - elif content is None and content_blocks is not None: + if content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), - content_blocks=content_blocks, **kwargs, ) - elif content is not None and content_blocks is not None: - super().__init__(content=content, content_blocks=content_blocks, **kwargs) else: super().__init__(content=content, **kwargs) diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index 12cd42ddc322c..37b05ed825566 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -443,11 +443,19 @@ def test_trace_images_in_openai_format() -> None: ] tracer = FakeChatModelStartTracer() response = llm.invoke(messages, config={"callbacks": [tracer]}) - assert tracer.messages[0][0][0].content == [ - { - "type": "image_url", - "image_url": {"url": "https://example.com/image.png"}, - } + assert tracer.messages == [ + [ + [ + HumanMessage( + content=[ + { + "type": "image_url", + "image_url": {"url": "https://example.com/image.png"}, + } + ] + ) + ] + ] ] # Test no mutation assert response.content == [ @@ -478,15 +486,23 @@ def test_trace_content_blocks_with_no_type_key() -> None: ] tracer = FakeChatModelStartTracer() response = llm.invoke(messages, config={"callbacks": [tracer]}) - assert tracer.messages[0][0][0].content == [ - { - "type": "text", - "text": "Hello", - }, - { - "type": "cachePoint", - "cachePoint": {"type": "default"}, - }, + assert tracer.messages == [ + [ + [ + HumanMessage( + [ + { + "type": "text", + "text": "Hello", + }, + { + "type": "cachePoint", + "cachePoint": {"type": "default"}, + }, + ] + ) + ] + ] ] # Test no mutation assert response.content == [ diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index ec17985c31baa..d6857aef32de2 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -3,7 +3,6 @@ from typing import Optional, Union import pytest -from pydantic import ValidationError from typing_extensions import get_args from langchain_core.documents import Document @@ -1268,14 +1267,10 @@ def test_typed_init() -> None: # Test we get type errors for malformed blocks (type checker will complain if # below type-ignores are unused). - with pytest.raises(ValidationError): - _ = AIMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] - with pytest.raises(ValidationError): - _ = HumanMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] - with pytest.raises(ValidationError): - _ = SystemMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] - with pytest.raises(ValidationError): - _ = ToolMessage( - content_blocks=[{"type": "text", "bad": "Hello"}], # type: ignore[list-item] - tool_call_id="abc123", - ) + _ = AIMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] + _ = HumanMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] + _ = SystemMessage(content_blocks=[{"type": "text", "bad": "Hello"}]) # type: ignore[list-item] + _ = ToolMessage( + content_blocks=[{"type": "text", "bad": "Hello"}], # type: ignore[list-item] + tool_call_id="abc123", + ) From 6eaa17205ccc40f80c5e544efa8cc36875e635da Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Tue, 12 Aug 2025 15:04:21 -0400 Subject: [PATCH 08/73] implement output_version on BaseChatModel --- .../langchain_core/language_models/chat_models.py | 14 ++++++++++++++ .../language_models/chat_models/test_cache.py | 5 +++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 06838e3b07592..1eb32ea1d66af 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -334,6 +334,20 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): """ + output_version: str = "v0" + """Version of AIMessage output format to use. + + This field is used to roll-out new output formats for chat model AIMessages + in a backwards-compatible way. + + ``'v1'`` standardizes output format using a list of typed ContentBlock dicts. We + recommend this for new applications. + + All chat models currently support the default of ``"v0"``. + + .. versionadded:: 1.0 + """ + @model_validator(mode="before") @classmethod def raise_deprecation(cls, values: dict) -> Any: diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_cache.py b/libs/core/tests/unit_tests/language_models/chat_models/test_cache.py index 39e4babc7821f..7cf428bb3ab15 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_cache.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_cache.py @@ -301,8 +301,9 @@ def test_llm_representation_for_serializable() -> None: assert chat._get_llm_string() == ( '{"id": ["tests", "unit_tests", "language_models", "chat_models", ' '"test_cache", "CustomChat"], "kwargs": {"messages": {"id": ' - '["builtins", "list_iterator"], "lc": 1, "type": "not_implemented"}}, "lc": ' - '1, "name": "CustomChat", "type": "constructor"}---[(\'stop\', None)]' + '["builtins", "list_iterator"], "lc": 1, "type": "not_implemented"}, ' + '"output_version": "v0"}, "lc": 1, "name": "CustomChat", "type": ' + "\"constructor\"}---[('stop', None)]" ) From 3ae7535f42c54908b0d5508878854aebbbc8186b Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Tue, 12 Aug 2025 15:15:57 -0400 Subject: [PATCH 09/73] openai: pull in _compat from 0.4 branch --- .../langchain_openai/chat_models/_compat.py | 468 +++++++++++++++++- 1 file changed, 460 insertions(+), 8 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/_compat.py b/libs/partners/openai/langchain_openai/chat_models/_compat.py index 25ff3eb607cc3..00f3b365c9b38 100644 --- a/libs/partners/openai/langchain_openai/chat_models/_compat.py +++ b/libs/partners/openai/langchain_openai/chat_models/_compat.py @@ -1,7 +1,10 @@ """ -This module converts between AIMessage output formats for the Responses API. +This module converts between AIMessage output formats, which are governed by the +``output_version`` attribute on ChatOpenAI. Supported values are ``"v0"`` and +``"responses/v1"``. -ChatOpenAI v0.3 stores reasoning and tool outputs in AIMessage.additional_kwargs: +``"v0"`` corresponds to the format as of ChatOpenAI v0.3. For the Responses API, it +stores reasoning and tool outputs in AIMessage.additional_kwargs: .. code-block:: python @@ -28,8 +31,9 @@ id="msg_123", ) -To retain information about response item sequencing (and to accommodate multiple -reasoning items), ChatOpenAI now stores these items in the content sequence: +``"responses/v1"`` is only applicable to the Responses API. It retains information +about response item sequencing and accommodates multiple reasoning items by +representing these items in the content sequence: .. code-block:: python @@ -57,18 +61,22 @@ content blocks, rather than on the AIMessage.id, which now stores the response ID. For backwards compatibility, this module provides functions to convert between the -old and new formats. The functions are used internally by ChatOpenAI. - +formats. The functions are used internally by ChatOpenAI. """ # noqa: E501 +import copy import json -from typing import Union +from collections.abc import Iterable, Iterator +from typing import Any, Literal, Optional, Union, cast -from langchain_core.messages import AIMessage +from langchain_core.messages import AIMessage, is_data_content_block +from langchain_core.messages import content_blocks as types +from langchain_core.v1.messages import AIMessage as AIMessageV1 _FUNCTION_CALL_IDS_MAP_KEY = "__openai_function_call_ids__" +# v0.3 / Responses def _convert_to_v03_ai_message( message: AIMessage, has_reasoning: bool = False ) -> AIMessage: @@ -253,3 +261,447 @@ def _convert_from_v03_ai_message(message: AIMessage) -> AIMessage: }, deep=False, ) + + +# v1 / Chat Completions +def _convert_from_v1_to_chat_completions(message: AIMessageV1) -> AIMessageV1: + """Convert a v1 message to the Chat Completions format.""" + new_content: list[types.ContentBlock] = [] + for block in message.content: + if block["type"] == "text": + # Strip annotations + new_content.append({"type": "text", "text": block["text"]}) + elif block["type"] in ("reasoning", "tool_call"): + pass + else: + new_content.append(block) + new_message = copy.copy(message) + new_message.content = new_content + + return new_message + + +# v1 / Responses +def _convert_annotation_to_v1(annotation: dict[str, Any]) -> types.Annotation: + annotation_type = annotation.get("type") + + if annotation_type == "url_citation": + known_fields = { + "type", + "url", + "title", + "cited_text", + "start_index", + "end_index", + } + url_citation = cast(types.Citation, {}) + for field in ("end_index", "start_index", "title"): + if field in annotation: + url_citation[field] = annotation[field] + url_citation["type"] = "citation" + url_citation["url"] = annotation["url"] + for field in annotation: + if field not in known_fields: + if "extras" not in url_citation: + url_citation["extras"] = {} + url_citation["extras"][field] = annotation[field] + return url_citation + + elif annotation_type == "file_citation": + known_fields = {"type", "title", "cited_text", "start_index", "end_index"} + document_citation: types.Citation = {"type": "citation"} + if "filename" in annotation: + document_citation["title"] = annotation.pop("filename") + for field in annotation: + if field not in known_fields: + if "extras" not in document_citation: + document_citation["extras"] = {} + document_citation["extras"][field] = annotation[field] + + return document_citation + + # TODO: standardise container_file_citation? + else: + non_standard_annotation: types.NonStandardAnnotation = { + "type": "non_standard_annotation", + "value": annotation, + } + return non_standard_annotation + + +def _explode_reasoning(block: dict[str, Any]) -> Iterable[types.ReasoningContentBlock]: + if "summary" not in block: + yield cast(types.ReasoningContentBlock, block) + return + + known_fields = {"type", "reasoning", "id", "index"} + unknown_fields = [ + field for field in block if field != "summary" and field not in known_fields + ] + if unknown_fields: + block["extras"] = {} + for field in unknown_fields: + block["extras"][field] = block.pop(field) + + if not block["summary"]: + _ = block.pop("summary", None) + yield cast(types.ReasoningContentBlock, block) + return + + # Common part for every exploded line, except 'summary' + common = {k: v for k, v in block.items() if k in known_fields} + + # Optional keys that must appear only in the first exploded item + first_only = block.pop("extras", None) + + for idx, part in enumerate(block["summary"]): + new_block = dict(common) + new_block["reasoning"] = part.get("text", "") + if idx == 0 and first_only: + new_block.update(first_only) + yield cast(types.ReasoningContentBlock, new_block) + + +def _convert_to_v1_from_responses( + content: list[dict[str, Any]], + tool_calls: Optional[list[types.ToolCall]] = None, + invalid_tool_calls: Optional[list[types.InvalidToolCall]] = None, +) -> list[types.ContentBlock]: + """Mutate a Responses message to v1 format.""" + + def _iter_blocks() -> Iterable[types.ContentBlock]: + for block in content: + if not isinstance(block, dict): + continue + block_type = block.get("type") + + if block_type == "text": + if "annotations" in block: + block["annotations"] = [ + _convert_annotation_to_v1(a) for a in block["annotations"] + ] + yield cast(types.TextContentBlock, block) + + elif block_type == "reasoning": + yield from _explode_reasoning(block) + + elif block_type == "image_generation_call" and ( + result := block.get("result") + ): + new_block = {"type": "image", "base64": result} + if output_format := block.get("output_format"): + new_block["mime_type"] = f"image/{output_format}" + if "id" in block: + new_block["id"] = block["id"] + if "index" in block: + new_block["index"] = block["index"] + for extra_key in ( + "status", + "background", + "output_format", + "quality", + "revised_prompt", + "size", + ): + if extra_key in block: + new_block[extra_key] = block[extra_key] + yield cast(types.ImageContentBlock, new_block) + + elif block_type == "function_call": + tool_call_block: Optional[types.ContentBlock] = None + call_id = block.get("call_id", "") + if call_id: + for tool_call in tool_calls or []: + if tool_call.get("id") == call_id: + tool_call_block = cast(types.ToolCall, tool_call.copy()) + break + else: + for invalid_tool_call in invalid_tool_calls or []: + if invalid_tool_call.get("id") == call_id: + tool_call_block = cast( + types.InvalidToolCall, invalid_tool_call.copy() + ) + break + if tool_call_block: + if "id" in block: + if "extras" not in tool_call_block: + tool_call_block["extras"] = {} + tool_call_block["extras"]["item_id"] = block["id"] # type: ignore[typeddict-item] + if "index" in block: + tool_call_block["index"] = block["index"] + yield tool_call_block + + elif block_type == "web_search_call": + web_search_call = {"type": "web_search_call", "id": block["id"]} + if "index" in block: + web_search_call["index"] = block["index"] + if ( + "action" in block + and isinstance(block["action"], dict) + and block["action"].get("type") == "search" + and "query" in block["action"] + ): + web_search_call["query"] = block["action"]["query"] + for key in block: + if key not in ("type", "id"): + web_search_call[key] = block[key] + + web_search_result = {"type": "web_search_result", "id": block["id"]} + if "index" in block: + web_search_result["index"] = block["index"] + 1 + yield cast(types.WebSearchCall, web_search_call) + yield cast(types.WebSearchResult, web_search_result) + + elif block_type == "code_interpreter_call": + code_interpreter_call = { + "type": "code_interpreter_call", + "id": block["id"], + } + if "code" in block: + code_interpreter_call["code"] = block["code"] + if "container_id" in block: + code_interpreter_call["container_id"] = block["container_id"] + if "index" in block: + code_interpreter_call["index"] = block["index"] + + code_interpreter_result = { + "type": "code_interpreter_result", + "id": block["id"], + } + if "outputs" in block: + code_interpreter_result["outputs"] = block["outputs"] + for output in block["outputs"]: + if ( + isinstance(output, dict) + and (output_type := output.get("type")) + and output_type == "logs" + ): + if "output" not in code_interpreter_result: + code_interpreter_result["output"] = [] + code_interpreter_result["output"].append( + { + "type": "code_interpreter_output", + "stdout": output.get("logs", ""), + } + ) + + if "status" in block: + code_interpreter_result["status"] = block["status"] + if "index" in block: + code_interpreter_result["index"] = block["index"] + 1 + + yield cast(types.CodeInterpreterCall, code_interpreter_call) + yield cast(types.CodeInterpreterResult, code_interpreter_result) + + else: + new_block = {"type": "non_standard", "value": block} + if "index" in new_block["value"]: + new_block["index"] = new_block["value"].pop("index") + yield cast(types.NonStandardContentBlock, new_block) + + return list(_iter_blocks()) + + +def _convert_annotation_from_v1(annotation: types.Annotation) -> dict[str, Any]: + if annotation["type"] == "citation": + new_ann: dict[str, Any] = {} + for field in ("end_index", "start_index"): + if field in annotation: + new_ann[field] = annotation[field] + + if "url" in annotation: + # URL citation + if "title" in annotation: + new_ann["title"] = annotation["title"] + new_ann["type"] = "url_citation" + new_ann["url"] = annotation["url"] + else: + # Document citation + new_ann["type"] = "file_citation" + if "title" in annotation: + new_ann["filename"] = annotation["title"] + + if extra_fields := annotation.get("extras"): + for field, value in extra_fields.items(): + new_ann[field] = value + + return new_ann + + elif annotation["type"] == "non_standard_annotation": + return annotation["value"] + + else: + return dict(annotation) + + +def _implode_reasoning_blocks(blocks: list[dict[str, Any]]) -> Iterable[dict[str, Any]]: + i = 0 + n = len(blocks) + + while i < n: + block = blocks[i] + + # Skip non-reasoning blocks or blocks already in Responses format + if block.get("type") != "reasoning" or "summary" in block: + yield dict(block) + i += 1 + continue + elif "reasoning" not in block and "summary" not in block: + # {"type": "reasoning", "id": "rs_..."} + oai_format = {**block, "summary": []} + if "extras" in oai_format: + oai_format.update(oai_format.pop("extras")) + oai_format["type"] = oai_format.pop("type", "reasoning") + if "encrypted_content" in oai_format: + oai_format["encrypted_content"] = oai_format.pop("encrypted_content") + yield oai_format + i += 1 + continue + else: + pass + + summary: list[dict[str, str]] = [ + {"type": "summary_text", "text": block.get("reasoning", "")} + ] + # 'common' is every field except the exploded 'reasoning' + common = {k: v for k, v in block.items() if k != "reasoning"} + if "extras" in common: + common.update(common.pop("extras")) + + i += 1 + while i < n: + next_ = blocks[i] + if next_.get("type") == "reasoning" and "reasoning" in next_: + summary.append( + {"type": "summary_text", "text": next_.get("reasoning", "")} + ) + i += 1 + else: + break + + merged = dict(common) + merged["summary"] = summary + merged["type"] = merged.pop("type", "reasoning") + yield merged + + +def _consolidate_calls( + items: Iterable[dict[str, Any]], + call_name: Literal["web_search_call", "code_interpreter_call"], + result_name: Literal["web_search_result", "code_interpreter_result"], +) -> Iterator[dict[str, Any]]: + """ + Generator that walks through *items* and, whenever it meets the pair + + {"type": "web_search_call", "id": X, ...} + {"type": "web_search_result", "id": X} + + merges them into + + {"id": X, + "action": …, + "status": …, + "type": "web_search_call"} + + keeping every other element untouched. + """ + items = iter(items) # make sure we have a true iterator + for current in items: + # Only a call can start a pair worth collapsing + if current.get("type") != call_name: + yield current + continue + + try: + nxt = next(items) # look-ahead one element + except StopIteration: # no “result” – just yield the call back + yield current + break + + # If this really is the matching “result” – collapse + if nxt.get("type") == result_name and nxt.get("id") == current.get("id"): + if call_name == "web_search_call": + collapsed = {"id": current["id"]} + if "action" in current: + collapsed["action"] = current["action"] + collapsed["status"] = current["status"] + collapsed["type"] = "web_search_call" + + if call_name == "code_interpreter_call": + collapsed = {"id": current["id"]} + for key in ("code", "container_id"): + if key in current: + collapsed[key] = current[key] + + for key in ("outputs", "status"): + if key in nxt: + collapsed[key] = nxt[key] + collapsed["type"] = "code_interpreter_call" + + yield collapsed + + else: + # Not a matching pair – emit both, in original order + yield current + yield nxt + + +def _convert_from_v1_to_responses( + content: list[types.ContentBlock], tool_calls: list[types.ToolCall] +) -> list[dict[str, Any]]: + new_content: list = [] + for block in content: + if block["type"] == "text" and "annotations" in block: + # Need a copy because we’re changing the annotations list + new_block = dict(block) + new_block["annotations"] = [ + _convert_annotation_from_v1(a) for a in block["annotations"] + ] + new_content.append(new_block) + elif block["type"] == "tool_call": + new_block = {"type": "function_call", "call_id": block["id"]} + if "extras" in block and "item_id" in block["extras"]: + new_block["id"] = block["extras"]["item_id"] + if "name" in block: + new_block["name"] = block["name"] + if "extras" in block and "arguments" in block["extras"]: + new_block["arguments"] = block["extras"]["arguments"] + if any(key not in block for key in ("name", "arguments")): + matching_tool_calls = [ + call for call in tool_calls if call["id"] == block["id"] + ] + if matching_tool_calls: + tool_call = matching_tool_calls[0] + if "name" not in block: + new_block["name"] = tool_call["name"] + if "arguments" not in block: + new_block["arguments"] = json.dumps(tool_call["args"]) + new_content.append(new_block) + elif ( + is_data_content_block(cast(dict, block)) + and block["type"] == "image" + and "base64" in block + and isinstance(block.get("id"), str) + and block["id"].startswith("ig_") + ): + new_block = {"type": "image_generation_call", "result": block["base64"]} + for extra_key in ("id", "status"): + if extra_key in block: + new_block[extra_key] = block[extra_key] # type: ignore[typeddict-item] + new_content.append(new_block) + elif block["type"] == "non_standard" and "value" in block: + new_content.append(block["value"]) + else: + new_content.append(block) + + new_content = list(_implode_reasoning_blocks(new_content)) + new_content = list( + _consolidate_calls(new_content, "web_search_call", "web_search_result") + ) + new_content = list( + _consolidate_calls( + new_content, "code_interpreter_call", "code_interpreter_result" + ) + ) + + return new_content From c1d65a7d7f17775d1b5a63e78d9ed317c317659c Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Tue, 12 Aug 2025 18:00:14 -0400 Subject: [PATCH 10/73] x --- .../langchain_openai/chat_models/_compat.py | 64 +++-- .../langchain_openai/chat_models/base.py | 143 ++++++++-- .../tests/unit_tests/chat_models/test_base.py | 259 +++++++++++++++++- .../chat_models/test_responses_stream.py | 128 +++++++-- 4 files changed, 529 insertions(+), 65 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/_compat.py b/libs/partners/openai/langchain_openai/chat_models/_compat.py index 00f3b365c9b38..f3431f1651499 100644 --- a/libs/partners/openai/langchain_openai/chat_models/_compat.py +++ b/libs/partners/openai/langchain_openai/chat_models/_compat.py @@ -64,14 +64,12 @@ formats. The functions are used internally by ChatOpenAI. """ # noqa: E501 -import copy import json from collections.abc import Iterable, Iterator from typing import Any, Literal, Optional, Union, cast -from langchain_core.messages import AIMessage, is_data_content_block +from langchain_core.messages import AIMessage, AIMessageChunk, is_data_content_block from langchain_core.messages import content_blocks as types -from langchain_core.v1.messages import AIMessage as AIMessageV1 _FUNCTION_CALL_IDS_MAP_KEY = "__openai_function_call_ids__" @@ -264,21 +262,51 @@ def _convert_from_v03_ai_message(message: AIMessage) -> AIMessage: # v1 / Chat Completions -def _convert_from_v1_to_chat_completions(message: AIMessageV1) -> AIMessageV1: - """Convert a v1 message to the Chat Completions format.""" - new_content: list[types.ContentBlock] = [] - for block in message.content: - if block["type"] == "text": - # Strip annotations - new_content.append({"type": "text", "text": block["text"]}) - elif block["type"] in ("reasoning", "tool_call"): - pass +def _convert_to_v1_from_chat_completions(message: AIMessage) -> AIMessage: + """Mutate a Chat Completions message to v1 format.""" + if isinstance(message.content, str): + if message.content: + message.content = [{"type": "text", "text": message.content}] else: - new_content.append(block) - new_message = copy.copy(message) - new_message.content = new_content + message.content = [] + + for tool_call in message.tool_calls: + if id_ := tool_call.get("id"): + message.content.append({"type": "tool_call", "id": id_}) + + if "tool_calls" in message.additional_kwargs: + _ = message.additional_kwargs.pop("tool_calls") + + if "token_usage" in message.response_metadata: + _ = message.response_metadata.pop("token_usage") + + return message + + +def _convert_to_v1_from_chat_completions_chunk(chunk: AIMessageChunk) -> AIMessageChunk: + result = _convert_to_v1_from_chat_completions(cast(AIMessage, chunk)) + return cast(AIMessageChunk, result) - return new_message + +def _convert_from_v1_to_chat_completions(message: AIMessage) -> AIMessage: + """Convert a v1 message to the Chat Completions format.""" + if isinstance(message.content, list): + new_content: list = [] + for block in message.content: + if isinstance(block, dict): + block_type = block.get("type") + if block_type == "text": + # Strip annotations + new_content.append({"type": "text", "text": block["text"]}) + elif block_type in ("reasoning", "tool_call"): + pass + else: + new_content.append(block) + else: + new_content.append(block) + return message.model_copy(update={"content": new_content}) + + return message # v1 / Responses @@ -408,7 +436,9 @@ def _iter_blocks() -> Iterable[types.ContentBlock]: yield cast(types.ImageContentBlock, new_block) elif block_type == "function_call": - tool_call_block: Optional[types.ContentBlock] = None + tool_call_block: Optional[ + Union[types.ToolCall, types.InvalidToolCall] + ] = None call_id = block.get("call_id", "") if call_id: for tool_call in tool_calls or []: diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index a8702359b36e6..947073b0e6a29 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -108,7 +108,12 @@ ) from langchain_openai.chat_models._compat import ( _convert_from_v03_ai_message, + _convert_from_v1_to_chat_completions, + _convert_from_v1_to_responses, _convert_to_v03_ai_message, + _convert_to_v1_from_chat_completions, + _convert_to_v1_from_chat_completions_chunk, + _convert_to_v1_from_responses, ) if TYPE_CHECKING: @@ -681,7 +686,7 @@ class BaseChatOpenAI(BaseChatModel): .. versionadded:: 0.3.9 """ - output_version: Literal["v0", "responses/v1"] = "v0" + output_version: str = "v0" """Version of AIMessage output format to use. This field is used to roll-out new output formats for chat model AIMessages @@ -692,8 +697,9 @@ class BaseChatOpenAI(BaseChatModel): - ``'v0'``: AIMessage format as of langchain-openai 0.3.x. - ``'responses/v1'``: Formats Responses API output items into AIMessage content blocks. + - ``"v1"``: v1 of LangChain cross-provider standard. - Currently only impacts the Responses API. ``output_version='responses/v1'`` is + Currently only impacts the Responses API. ``output_version='v1'`` is recommended. .. versionadded:: 0.3.25 @@ -896,6 +902,10 @@ def _convert_chunk_to_generation_chunk( message=default_chunk_class(content="", usage_metadata=usage_metadata), generation_info=base_generation_info, ) + if self.output_version == "v1": + generation_chunk.message = _convert_to_v1_from_chat_completions_chunk( + cast(AIMessageChunk, generation_chunk.message) + ) return generation_chunk choice = choices[0] @@ -923,6 +933,20 @@ def _convert_chunk_to_generation_chunk( if usage_metadata and isinstance(message_chunk, AIMessageChunk): message_chunk.usage_metadata = usage_metadata + if self.output_version == "v1": + message_chunk = cast(AIMessageChunk, message_chunk) + # Convert to v1 format + if isinstance(message_chunk.content, str): + message_chunk = _convert_to_v1_from_chat_completions_chunk( + message_chunk + ) + if message_chunk.content: + message_chunk.content[0]["index"] = 0 # type: ignore[index] + else: + message_chunk = _convert_to_v1_from_chat_completions_chunk( + message_chunk + ) + generation_chunk = ChatGenerationChunk( message=message_chunk, generation_info=generation_info or None ) @@ -1216,7 +1240,12 @@ def _get_request_payload( else: payload = _construct_responses_api_payload(messages, payload) else: - payload["messages"] = [_convert_message_to_dict(m) for m in messages] + payload["messages"] = [ + _convert_message_to_dict(_convert_from_v1_to_chat_completions(m)) + if isinstance(m, AIMessage) + else _convert_message_to_dict(m) + for m in messages + ] return payload def _create_chat_result( @@ -1282,6 +1311,12 @@ def _create_chat_result( if hasattr(message, "refusal"): generations[0].message.additional_kwargs["refusal"] = message.refusal + if self.output_version == "v1": + _ = llm_output.pop("token_usage", None) + generations[0].message = _convert_to_v1_from_chat_completions( + cast(AIMessage, generations[0].message) + ) + return ChatResult(generations=generations, llm_output=llm_output) async def _astream( @@ -3660,6 +3695,10 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list: if isinstance(lc_msg, AIMessage): lc_msg = _convert_from_v03_ai_message(lc_msg) msg = _convert_message_to_dict(lc_msg) + if isinstance(lc_msg, AIMessage): + msg["content"] = _convert_from_v1_to_responses( + msg["content"], lc_msg.tool_calls + ) # "name" parameter unsupported if "name" in msg: msg.pop("name") @@ -3821,7 +3860,7 @@ def _construct_lc_result_from_responses_api( response: Response, schema: Optional[type[_BM]] = None, metadata: Optional[dict] = None, - output_version: Literal["v0", "responses/v1"] = "v0", + output_version: str = "v0", ) -> ChatResult: """Construct ChatResponse from OpenAI Response API response.""" if response.error: @@ -3959,6 +3998,30 @@ def _construct_lc_result_from_responses_api( additional_kwargs["parsed"] = parsed except json.JSONDecodeError: pass + + if output_version == "v1": + content_blocks = _convert_to_v1_from_responses(content_blocks) + + if response.tools and any( + tool.type == "image_generation" for tool in response.tools + ): + # Get mime_time from tool definition and add to image generations + # if missing (primarily for tracing purposes). + image_generation_call = next( + tool for tool in response.tools if tool.type == "image_generation" + ) + if image_generation_call.output_format: + mime_type = f"image/{image_generation_call.output_format}" + for content_block in content_blocks: + # OK to mutate output message + if ( + isinstance(content_block, dict) + and content_block.get("type") == "image" + and "base64" in content_block + and "mime_type" not in block + ): + block["mime_type"] = mime_type + message = AIMessage( content=content_blocks, id=response.id, @@ -3983,7 +4046,7 @@ def _convert_responses_chunk_to_generation_chunk( schema: Optional[type[_BM]] = None, metadata: Optional[dict] = None, has_reasoning: bool = False, - output_version: Literal["v0", "responses/v1"] = "v0", + output_version: str = "v0", ) -> tuple[int, int, int, Optional[ChatGenerationChunk]]: def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: """Advance indexes tracked during streaming. @@ -4049,9 +4112,29 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: annotation = chunk.annotation else: annotation = chunk.annotation.model_dump(exclude_none=True, mode="json") - content.append({"annotations": [annotation], "index": current_index}) + if output_version == "v1": + content.append( + { + "type": "text", + "text": "", + "annotations": [annotation], + "index": current_index, + } + ) + else: + content.append({"annotations": [annotation], "index": current_index}) elif chunk.type == "response.output_text.done": - content.append({"id": chunk.item_id, "index": current_index}) + if output_version == "v1": + content.append( + { + "type": "text", + "text": "", + "id": chunk.item_id, + "index": current_index, + } + ) + else: + content.append({"id": chunk.item_id, "index": current_index}) elif chunk.type == "response.created": id = chunk.response.id response_metadata["id"] = chunk.response.id # Backwards compatibility @@ -4144,21 +4227,35 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: content.append({"type": "refusal", "refusal": chunk.refusal}) elif chunk.type == "response.output_item.added" and chunk.item.type == "reasoning": _advance(chunk.output_index) + current_sub_index = 0 reasoning = chunk.item.model_dump(exclude_none=True, mode="json") reasoning["index"] = current_index content.append(reasoning) elif chunk.type == "response.reasoning_summary_part.added": - _advance(chunk.output_index) - content.append( - { - # langchain-core uses the `index` key to aggregate text blocks. - "summary": [ - {"index": chunk.summary_index, "type": "summary_text", "text": ""} - ], - "index": current_index, - "type": "reasoning", - } - ) + if output_version in ("v0", "responses/v1"): + _advance(chunk.output_index) + content.append( + { + # langchain-core uses the `index` key to aggregate text blocks. + "summary": [ + { + "index": chunk.summary_index, + "type": "summary_text", + "text": "", + } + ], + "index": current_index, + "type": "reasoning", + } + ) + else: + # v1 + block: dict = {"type": "reasoning", "reasoning": ""} + if chunk.summary_index > 0: + _advance(chunk.output_index, chunk.summary_index) + block["id"] = chunk.item_id + block["index"] = current_index + content.append(block) elif chunk.type == "response.image_generation_call.partial_image": # Partial images are not supported yet. pass @@ -4180,6 +4277,16 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: else: return current_index, current_output_index, current_sub_index, None + if output_version == "v1": + content = cast(AIMessageChunk, _convert_to_v1_from_responses(content)) + for content_block in content: + if ( + isinstance(content_block, dict) + and content_block.get("index", -1) > current_index + ): + # blocks were added for v1 + current_index = content_block["index"] + message = AIMessageChunk( content=content, # type: ignore[arg-type] tool_call_chunks=tool_call_chunks, diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index 731857906020f..6713f8c967b7b 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -20,11 +20,13 @@ ToolCall, ToolMessage, ) +from langchain_core.messages import content_blocks as types from langchain_core.messages.ai import UsageMetadata from langchain_core.outputs import ChatGeneration, ChatResult from langchain_core.runnables import RunnableLambda from langchain_core.tracers.base import BaseTracer from langchain_core.tracers.schemas import Run +from langchain_core.v1.messages import AIMessage as AIMessageV1 from openai.types.responses import ResponseOutputMessage, ResponseReasoningItem from openai.types.responses.response import IncompleteDetails, Response, ResponseUsage from openai.types.responses.response_error import ResponseError @@ -51,7 +53,10 @@ from langchain_openai.chat_models._compat import ( _FUNCTION_CALL_IDS_MAP_KEY, _convert_from_v03_ai_message, + _convert_from_v1_to_chat_completions, + _convert_from_v1_to_responses, _convert_to_v03_ai_message, + _convert_to_v1_from_responses, ) from langchain_openai.chat_models.base import ( _construct_lc_result_from_responses_api, @@ -2373,7 +2378,7 @@ def mock_create(*args: Any, **kwargs: Any) -> Response: assert payload["tools"][0]["headers"]["Authorization"] == "Bearer PLACEHOLDER" -def test_compat() -> None: +def test_compat_responses_v03() -> None: # Check compatibility with v0.3 message format message_v03 = AIMessage( content=[ @@ -2434,6 +2439,258 @@ def test_compat() -> None: assert message_v03_output is not message_v03 +@pytest.mark.parametrize( + "message_v1, expected", + [ + ( + AIMessageV1( + [ + {"type": "reasoning", "reasoning": "Reasoning text"}, + { + "type": "tool_call", + "id": "call_123", + "name": "get_weather", + "args": {"location": "San Francisco"}, + }, + { + "type": "text", + "text": "Hello, world!", + "annotations": [ + {"type": "citation", "url": "https://example.com"} + ], + }, + ], + id="chatcmpl-123", + response_metadata={"model_provider": "openai", "model_name": "gpt-4.1"}, + ), + AIMessageV1( + [{"type": "text", "text": "Hello, world!"}], + id="chatcmpl-123", + response_metadata={"model_provider": "openai", "model_name": "gpt-4.1"}, + ), + ) + ], +) +def test_convert_from_v1_to_chat_completions( + message_v1: AIMessageV1, expected: AIMessageV1 +) -> None: + result = _convert_from_v1_to_chat_completions(message_v1) + assert result == expected + assert result.tool_calls == message_v1.tool_calls # tool calls remain cached + + # Check no mutation + assert message_v1 != result + + +@pytest.mark.parametrize( + "message_v1, expected", + [ + ( + AIMessageV1( + [ + {"type": "reasoning", "id": "abc123"}, + {"type": "reasoning", "id": "abc234", "reasoning": "foo "}, + {"type": "reasoning", "id": "abc234", "reasoning": "bar"}, + { + "type": "tool_call", + "id": "call_123", + "name": "get_weather", + "args": {"location": "San Francisco"}, + }, + { + "type": "tool_call", + "id": "call_234", + "name": "get_weather_2", + "args": {"location": "New York"}, + "extras": {"item_id": "fc_123"}, + }, + {"type": "text", "text": "Hello "}, + { + "type": "text", + "text": "world", + "annotations": [ + {"type": "citation", "url": "https://example.com"}, + { + "type": "citation", + "title": "my doc", + "extras": {"file_id": "file_123", "index": 1}, + }, + { + "type": "non_standard_annotation", + "value": {"bar": "baz"}, + }, + ], + }, + {"type": "image", "base64": "...", "id": "ig_123"}, + { + "type": "non_standard", + "value": {"type": "something_else", "foo": "bar"}, + }, + ], + id="resp123", + ), + [ + {"type": "reasoning", "id": "abc123", "summary": []}, + { + "type": "reasoning", + "id": "abc234", + "summary": [ + {"type": "summary_text", "text": "foo "}, + {"type": "summary_text", "text": "bar"}, + ], + }, + { + "type": "function_call", + "call_id": "call_123", + "name": "get_weather", + "arguments": '{"location": "San Francisco"}', + }, + { + "type": "function_call", + "call_id": "call_234", + "name": "get_weather_2", + "arguments": '{"location": "New York"}', + "id": "fc_123", + }, + {"type": "text", "text": "Hello "}, + { + "type": "text", + "text": "world", + "annotations": [ + {"type": "url_citation", "url": "https://example.com"}, + { + "type": "file_citation", + "filename": "my doc", + "index": 1, + "file_id": "file_123", + }, + {"bar": "baz"}, + ], + }, + {"type": "image_generation_call", "id": "ig_123", "result": "..."}, + {"type": "something_else", "foo": "bar"}, + ], + ) + ], +) +def test_convert_from_v1_to_responses( + message_v1: AIMessageV1, expected: AIMessageV1 +) -> None: + result = _convert_from_v1_to_responses(message_v1.content, message_v1.tool_calls) + assert result == expected + + # Check no mutation + assert message_v1 != result + + +@pytest.mark.parametrize( + "responses_content, tool_calls, expected_content", + [ + ( + [ + {"type": "reasoning", "id": "abc123", "summary": []}, + { + "type": "reasoning", + "id": "abc234", + "summary": [ + {"type": "summary_text", "text": "foo "}, + {"type": "summary_text", "text": "bar"}, + ], + }, + { + "type": "function_call", + "call_id": "call_123", + "name": "get_weather", + "arguments": '{"location": "San Francisco"}', + }, + { + "type": "function_call", + "call_id": "call_234", + "name": "get_weather_2", + "arguments": '{"location": "New York"}', + "id": "fc_123", + }, + {"type": "text", "text": "Hello "}, + { + "type": "text", + "text": "world", + "annotations": [ + {"type": "url_citation", "url": "https://example.com"}, + { + "type": "file_citation", + "filename": "my doc", + "index": 1, + "file_id": "file_123", + }, + {"bar": "baz"}, + ], + }, + {"type": "image_generation_call", "id": "ig_123", "result": "..."}, + {"type": "something_else", "foo": "bar"}, + ], + [ + { + "type": "tool_call", + "id": "call_123", + "name": "get_weather", + "args": {"location": "San Francisco"}, + }, + { + "type": "tool_call", + "id": "call_234", + "name": "get_weather_2", + "args": {"location": "New York"}, + }, + ], + [ + {"type": "reasoning", "id": "abc123"}, + {"type": "reasoning", "id": "abc234", "reasoning": "foo "}, + {"type": "reasoning", "id": "abc234", "reasoning": "bar"}, + { + "type": "tool_call", + "id": "call_123", + "name": "get_weather", + "args": {"location": "San Francisco"}, + }, + { + "type": "tool_call", + "id": "call_234", + "name": "get_weather_2", + "args": {"location": "New York"}, + "extras": {"item_id": "fc_123"}, + }, + {"type": "text", "text": "Hello "}, + { + "type": "text", + "text": "world", + "annotations": [ + {"type": "citation", "url": "https://example.com"}, + { + "type": "citation", + "title": "my doc", + "extras": {"file_id": "file_123", "index": 1}, + }, + {"type": "non_standard_annotation", "value": {"bar": "baz"}}, + ], + }, + {"type": "image", "base64": "...", "id": "ig_123"}, + { + "type": "non_standard", + "value": {"type": "something_else", "foo": "bar"}, + }, + ], + ) + ], +) +def test_convert_to_v1_from_responses( + responses_content: list[dict[str, Any]], + tool_calls: list[ToolCall], + expected_content: list[types.ContentBlock], +) -> None: + result = _convert_to_v1_from_responses(responses_content, tool_calls) + assert result == expected_content + + def test_get_last_messages() -> None: messages: list[BaseMessage] = [HumanMessage("Hello")] last_messages, previous_response_id = _get_last_messages(messages) diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py b/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py index eca5ee1c2559a..49c88ab4aadeb 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py @@ -1,6 +1,7 @@ from typing import Any, Optional from unittest.mock import MagicMock, patch +import pytest from langchain_core.messages import AIMessageChunk, BaseMessageChunk from openai.types.responses import ( ResponseCompletedEvent, @@ -337,7 +338,7 @@ id="rs_234", summary=[], type="reasoning", - encrypted_content=None, + encrypted_content="encrypted-content", status=None, ), output_index=2, @@ -416,7 +417,7 @@ Summary(text="still more reasoning", type="summary_text"), ], type="reasoning", - encrypted_content=None, + encrypted_content="encrypted-content", status=None, ), output_index=2, @@ -562,7 +563,7 @@ Summary(text="still more reasoning", type="summary_text"), ], type="reasoning", - encrypted_content=None, + encrypted_content="encrypted-content", status=None, ), ResponseOutputMessage( @@ -620,8 +621,99 @@ def _strip_none(obj: Any) -> Any: return obj -def test_responses_stream() -> None: - llm = ChatOpenAI(model="o4-mini", output_version="responses/v1") +@pytest.mark.parametrize( + "output_version, expected_content", + [ + ( + "responses/v1", + [ + { + "id": "rs_123", + "summary": [ + { + "index": 0, + "type": "summary_text", + "text": "reasoning block one", + }, + { + "index": 1, + "type": "summary_text", + "text": "another reasoning block", + }, + ], + "type": "reasoning", + "index": 0, + }, + {"type": "text", "text": "text block one", "index": 1, "id": "msg_123"}, + { + "type": "text", + "text": "another text block", + "index": 2, + "id": "msg_123", + }, + { + "id": "rs_234", + "summary": [ + {"index": 0, "type": "summary_text", "text": "more reasoning"}, + { + "index": 1, + "type": "summary_text", + "text": "still more reasoning", + }, + ], + "encrypted_content": "encrypted-content", + "type": "reasoning", + "index": 3, + }, + {"type": "text", "text": "more", "index": 4, "id": "msg_234"}, + {"type": "text", "text": "text", "index": 5, "id": "msg_234"}, + ], + ), + ( + "v1", + [ + { + "type": "reasoning", + "reasoning": "reasoning block one", + "id": "rs_123", + "index": 0, + }, + { + "type": "reasoning", + "reasoning": "another reasoning block", + "id": "rs_123", + "index": 1, + }, + {"type": "text", "text": "text block one", "index": 2, "id": "msg_123"}, + { + "type": "text", + "text": "another text block", + "index": 3, + "id": "msg_123", + }, + { + "type": "reasoning", + "reasoning": "more reasoning", + "id": "rs_234", + "extras": {"encrypted_content": "encrypted-content"}, + "index": 4, + }, + { + "type": "reasoning", + "reasoning": "still more reasoning", + "id": "rs_234", + "index": 5, + }, + {"type": "text", "text": "more", "index": 6, "id": "msg_234"}, + {"type": "text", "text": "text", "index": 7, "id": "msg_234"}, + ], + ), + ], +) +def test_responses_stream(output_version: str, expected_content: list[dict]) -> None: + llm = ChatOpenAI( + model="o4-mini", use_responses_api=True, output_version=output_version + ) mock_client = MagicMock() def mock_create(*args: Any, **kwargs: Any) -> MockSyncContextManager: @@ -630,36 +722,14 @@ def mock_create(*args: Any, **kwargs: Any) -> MockSyncContextManager: mock_client.responses.create = mock_create full: Optional[BaseMessageChunk] = None + chunks = [] with patch.object(llm, "root_client", mock_client): for chunk in llm.stream("test"): assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk + chunks.append(chunk) assert isinstance(full, AIMessageChunk) - expected_content = [ - { - "id": "rs_123", - "summary": [ - {"index": 0, "type": "summary_text", "text": "reasoning block one"}, - {"index": 1, "type": "summary_text", "text": "another reasoning block"}, - ], - "type": "reasoning", - "index": 0, - }, - {"type": "text", "text": "text block one", "index": 1, "id": "msg_123"}, - {"type": "text", "text": "another text block", "index": 2, "id": "msg_123"}, - { - "id": "rs_234", - "summary": [ - {"index": 0, "type": "summary_text", "text": "more reasoning"}, - {"index": 1, "type": "summary_text", "text": "still more reasoning"}, - ], - "type": "reasoning", - "index": 3, - }, - {"type": "text", "text": "more", "index": 4, "id": "msg_234"}, - {"type": "text", "text": "text", "index": 5, "id": "msg_234"}, - ] assert full.content == expected_content assert full.additional_kwargs == {} assert full.id == "resp_123" From c0e4361192a07f6f349707d2acf3b809acc71ff9 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Tue, 12 Aug 2025 18:03:19 -0400 Subject: [PATCH 11/73] core: populate tool_calls when initializing AIMessage via content_blocks --- libs/core/langchain_core/messages/ai.py | 11 ++++++++++- .../core/tests/unit_tests/messages/test_ai.py | 19 ++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 9d4a1e01879df..b38a7fa1a4924 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -203,6 +203,13 @@ def __init__( ) -> None: """Specify content as a positional arg or content_blocks for typing support.""" if content_blocks is not None: + # If there are tool calls in content_blocks, but not in tool_calls, add them + content_tool_calls = [ + block for block in content_blocks if block.get("type") == "tool_call" + ] + if content_tool_calls and "tool_calls" not in kwargs: + kwargs["tool_calls"] = content_tool_calls + super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), **kwargs, @@ -273,7 +280,9 @@ def _backwards_compat_tool_calls(cls, values: dict) -> Any: # Ensure "type" is properly set on all tool call-like dicts. if tool_calls := values.get("tool_calls"): values["tool_calls"] = [ - create_tool_call(**{k: v for k, v in tc.items() if k != "type"}) + create_tool_call( + **{k: v for k, v in tc.items() if k not in ("type", "extras")} + ) for tc in tool_calls ] if invalid_tool_calls := values.get("invalid_tool_calls"): diff --git a/libs/core/tests/unit_tests/messages/test_ai.py b/libs/core/tests/unit_tests/messages/test_ai.py index 81981725c5005..a7225015c2cff 100644 --- a/libs/core/tests/unit_tests/messages/test_ai.py +++ b/libs/core/tests/unit_tests/messages/test_ai.py @@ -253,7 +253,7 @@ def test_content_blocks() -> None: "id": "abc_123", }, ] - missing_tool_call = { + missing_tool_call: types.ToolCall = { "type": "tool_call", "name": "bar", "args": {"c": "d"}, @@ -267,3 +267,20 @@ def test_content_blocks() -> None: ], ) assert message.content_blocks == [*standard_content, missing_tool_call] + + # Check we auto-populate tool_calls + standard_content = [ + {"type": "text", "text": "foo"}, + { + "type": "tool_call", + "name": "foo", + "args": {"a": "b"}, + "id": "abc_123", + }, + missing_tool_call, + ] + message = AIMessage(content_blocks=standard_content) + assert message.tool_calls == [ + {"type": "tool_call", "name": "foo", "args": {"a": "b"}, "id": "abc_123"}, + missing_tool_call, + ] From 5c961ca4f683dcc71a259bc17304f88fb8f0b915 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Tue, 12 Aug 2025 18:10:20 -0400 Subject: [PATCH 12/73] update test_base --- .../openai/langchain_openai/chat_models/base.py | 4 ++-- .../tests/unit_tests/chat_models/test_base.py | 17 +++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 947073b0e6a29..552d45e41eec3 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -3695,7 +3695,7 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list: if isinstance(lc_msg, AIMessage): lc_msg = _convert_from_v03_ai_message(lc_msg) msg = _convert_message_to_dict(lc_msg) - if isinstance(lc_msg, AIMessage): + if isinstance(lc_msg, AIMessage) and isinstance(msg.get("content"), list): msg["content"] = _convert_from_v1_to_responses( msg["content"], lc_msg.tool_calls ) @@ -4278,7 +4278,7 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: return current_index, current_output_index, current_sub_index, None if output_version == "v1": - content = cast(AIMessageChunk, _convert_to_v1_from_responses(content)) + content = cast(list[dict], _convert_to_v1_from_responses(content)) for content_block in content: if ( isinstance(content_block, dict) diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index 6713f8c967b7b..ee89dc47fb8a7 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -26,7 +26,6 @@ from langchain_core.runnables import RunnableLambda from langchain_core.tracers.base import BaseTracer from langchain_core.tracers.schemas import Run -from langchain_core.v1.messages import AIMessage as AIMessageV1 from openai.types.responses import ResponseOutputMessage, ResponseReasoningItem from openai.types.responses.response import IncompleteDetails, Response, ResponseUsage from openai.types.responses.response_error import ResponseError @@ -2443,7 +2442,7 @@ def test_compat_responses_v03() -> None: "message_v1, expected", [ ( - AIMessageV1( + AIMessage( [ {"type": "reasoning", "reasoning": "Reasoning text"}, { @@ -2463,7 +2462,7 @@ def test_compat_responses_v03() -> None: id="chatcmpl-123", response_metadata={"model_provider": "openai", "model_name": "gpt-4.1"}, ), - AIMessageV1( + AIMessage( [{"type": "text", "text": "Hello, world!"}], id="chatcmpl-123", response_metadata={"model_provider": "openai", "model_name": "gpt-4.1"}, @@ -2472,7 +2471,7 @@ def test_compat_responses_v03() -> None: ], ) def test_convert_from_v1_to_chat_completions( - message_v1: AIMessageV1, expected: AIMessageV1 + message_v1: AIMessage, expected: AIMessage ) -> None: result = _convert_from_v1_to_chat_completions(message_v1) assert result == expected @@ -2486,8 +2485,8 @@ def test_convert_from_v1_to_chat_completions( "message_v1, expected", [ ( - AIMessageV1( - [ + AIMessage( + content_blocks=[ {"type": "reasoning", "id": "abc123"}, {"type": "reasoning", "id": "abc234", "reasoning": "foo "}, {"type": "reasoning", "id": "abc234", "reasoning": "bar"}, @@ -2574,9 +2573,11 @@ def test_convert_from_v1_to_chat_completions( ], ) def test_convert_from_v1_to_responses( - message_v1: AIMessageV1, expected: AIMessageV1 + message_v1: AIMessage, expected: list[dict[str, Any]] ) -> None: - result = _convert_from_v1_to_responses(message_v1.content, message_v1.tool_calls) + result = _convert_from_v1_to_responses( + message_v1.content_blocks, message_v1.tool_calls + ) assert result == expected # Check no mutation From 0c7294f608df054081852ae74db90b1c140cb94c Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Wed, 13 Aug 2025 10:08:37 -0400 Subject: [PATCH 13/73] openai: pull in responses api integration tests from 0.4 branch --- .../cassettes/test_function_calling.yaml.gz | Bin 0 -> 7912 bytes .../test_parsed_pydantic_schema.yaml.gz | Bin 0 -> 4616 bytes .../tests/cassettes/test_web_search.yaml.gz | Bin 24336 -> 27998 bytes .../chat_models/test_responses_api.py | 628 +++++++++++++++--- 4 files changed, 543 insertions(+), 85 deletions(-) create mode 100644 libs/partners/openai/tests/cassettes/test_function_calling.yaml.gz create mode 100644 libs/partners/openai/tests/cassettes/test_parsed_pydantic_schema.yaml.gz diff --git a/libs/partners/openai/tests/cassettes/test_function_calling.yaml.gz b/libs/partners/openai/tests/cassettes/test_function_calling.yaml.gz new file mode 100644 index 0000000000000000000000000000000000000000..197a8402cf6ebaf57e31e7d5600363e77502e1c7 GIT binary patch literal 7912 zcmV%O&o{ibqo`^HHrJ|_H%KY-n%&O|Q+0X54y;=Y99}mCSOZWK4 zFaPS$}$p*&v(R;3~I$HQG;7gu()2>Q)l;&ywXsSCj% zvV6O%hWJddrP|s#rZ0B8;huFIS)v`O!c?zq(+=@gJ8EyzFGbPr4p&a+P^HiD*2Jvz zRt`S1rN>|p^p2J>6rJeNn>Fh{(^ z4B~Y#!?Yn1VP-T!n#okJ3YPAMPSC5NqiXuxuoAVO^>H)Ny93P5S%8FaP-U*WpvE-W)%*e*N_yr3MeP*jc(k?A>a)-T2G5 zAenB0WqhNZvy9`J^KqliEZ#hXv)J4CcQo7_SF;<<{N?)AEVt{~+r!jDM>iTB9@d*_ z5Qnq%`eFL!`1dwlA7kgvw&C=qh|5iv>pL5+rp^LWd)Q&oZwtNr)Z1+Hc6@M_+q*^b zhNeL@`&{b}M-|PWRqh|Z{5cqn@#pp}_~X3MzYRXc!=tND*0A!OEgc9c>@p%)v9>iU zDcrCQ@$lBv#nh?|ZS8i#y`AFD!ZOsk6NP(xwzC5o6%4tn9qpj;C^whhY$W4F4c{Sr zGlpSm3~`D@u=<_T(T;dHB$S*Hl$_yD0X>*zykO52NE)UH3VFp$76vuyg^r=EoJd>H z)a2L;7_f!(UEcA|j8>spd123#r=NBxX@OBuL%`~M+OF-Lefj7ATYPes z%SG@vfc*_%_X1c*0QTa!Qah15PoC4MB~xR#a-)f7M_R=>HWIf}JI+Rp2*9>U0jxU& znzgHc9tk^oJ3F;l(McHyv5bu^4MUq=5V?D|DG2sa+n!RQ;|1)xE2 zjow!7dcf!gV{J|cv<8R?F&r~s1BBG}AZB<@%gC9>z%0d^5~Af`HX>5>K3k*j@$!SK zhnVgQX2sZD<^%wh22DBdxE4$!ZSFSd!9#ia6-)2ELbkwYb!k)@V0RYiq65z&*8T>% zzk%-m9niha)~jW_o)tkQ>ukMwBaJ@CKOo?K>B-IBiozk8g-7b%NX$nydXdz~Ow2?P z1}*Pkj(a}n(xaJxIL#eFork`fcn77mNR;tQqPAoy>rxqOfuhS51sK>2>Rw^L#Vc&7 z3RvUctAu#~hv6##7j|7gsLAB0wCa1_<@_b-*V#ycxov=!Ld^)THwZTgtQG*TXSs>b zqV{|$xriRMEt^#*i)qv6!>mgO=d>r_R%v(H8nZcAOhf|m-W9bvXzcTR{0b z8+_qAn!VWRzHHRakBzN|+v!MvykrpPdZj;pdgQ`@5Co^I3G~XWOP#HF-J4}Uso7L# zVh^lHeO2e4WVKAmn!kuxRjhv7SEAlxFs@7LSPPZ0C#cd(MIGnMGZ-rewWSEk+&rkd zd@$!ul8qpJMUL3@5=F13lWk^Nz}ohbtjiBpIc*-bk5)z7K&AqdeY5ww(d<1M5Rf`L zIC301n7EL2`8qmz(;TjKZAjsQWCI=a?atv3$hpPA z^&^77Yq4xum*nKxYId~9q)GEtj8(zLNNEHJ+C(dxIF@ch;gZSnKTKGM{=Ckf;iE*f zH0zQcHh$+LqY-0d0-q4q*PI$OdxB;7g6yT=y}saQS6F9VrX%8p1ww{xAP;& zCYYzX=!r!v40D1^Tbb;$-`BB$ULno}QZAta>~xbYdvk13@-_t?5HH5W^a*oIW<(@j1H6{&Oi-zjmYIh z#fbvIv6A1OO4^oL8@a$-xzfmz8)Ul%l-Yzp?^9M$UKWf7C>2_H+jJJ^%9X=LO?116 zT~C`I2Dwao7tF%|eSl|Ou}HGO*+SKO#KaQ#=9L|}Cxk`G-d@-~$#v-4r>{@c@q#&z z)8$D6;0xYEf*4{6p4ez!Use}ETeJNztebyaW6;eaxO(Sn7A=h_TDsPtHQikQVl;aL z6MyHc%|kPdudaPPew)VYSA-MSY8Fp}hvl0$jd?_SquuIl=^$po+#aI!t%f|9xz>cj5eI=RiEFid z!yIQ0qM6O{kL#h*8k4J>15;Pu@b}KutK7p-dXH!J zz^>taL8qgXmEQ_NGxVg_5C;*>f{{~1jc!x#41Qo5{wt6E%1(-RaV_ZNL&!Z$TAI01 zmDxZ5N3|*J)uRB9!{nHHBg&+T0xz7bZ9Gq}$wygwTVBk$73JhJDtRWS$UYL8vLb{W zLaAxu>LLWdD#GkDmT!aya-(o=X*Eh@k~azvx(GH))uv%fu{?~{QBBy=Maq5B3Ll9u zDWk`IpNAck3yrLugFxRsD^6qS9w}p)2cKCE7ZKmIQ(Ku5OJMr&&O6ez`|`V`JnqMt z-1qA8*D}00Kf9jj{Ys9vH1TVx-dkDTewku3EzwvVo|7s5$h(zzwxYZOHiy!%ekd!teh26M(`FL`W)m6qCpohC~t81OA=Qj9XIy>h(F z2TuyoUI6HPn)s_B)|%c5@&vJOExj_5pMhMC{5+-jjsv)YjS20>0K+7sgrSN7%PY*E z)*OmCY!D(dV`GVrwA=O%Y0$D3>fDOdjx|@uK>BeQnsaxqCcT9@Hy;OMYcXia2Q{fn z$~+9r@q%Y52d$xRj@7xDu#x1EANMglLwS*%gfmgcGSD9Y*m_~m!5sx9H;$>5P*{f* zq^>9xc9E5|fQ=9ex`z=)wPOjr0B~wPaig)O+Vcrw2x~~D#wy(rqHBw(p=~mpyhxf{ z?5!XhZxAX!PIb6^i~_HiPmN)cp2B zjd`6rhdsJx`BRyB$LbvVS!{ZtEM;#|&SDf9jC_e-s?}F4sS}gr`T<^ z#Tg!-_&L#w-}m1aqeo!lL@vTF(p!a>vxxd*B|i-8KI$&t-9>o$bi?f{Y$ZXsU+-By zzC0hSV%f)ES01>yE|H$x#%JO_0I)qb;g+bjGzc0cN>D90d05MeNaZ>dx(3dgFDu5S ziaK5yxIW+AD5bO%EpvlB3z{hu1rmj}#udq$mjY!`)q8Ualqo{C#_k+Tg|5L%^(QH} zo?&z&P|oe$hRkR)E_uRCOK4V)WuX-l|FZ1Pg?3I2z974Ep=DjFqp!>ETxh(@&&iGo zl>vv`bFAg=thD<&qfxJfDjZJwnJRw_!IoGk(TCmLfMkj2D#16!>Ur?IJgXy5L^GmY z1W#3xnF^tp`Vcm(h341_t&X`@=W^AIo~n38F!iUzl$4~Dfywh2Qb=e*C9FN;LFwxL zCeJ7D%I;ifyvuLM?p$a&HTab5DrMQ73yrJ#H)MA%v|@QqWfvSA8R7a@oJH=(Q;`2F zoW-f{x#29lyPXAJo;zIrz0RVf>a$Vr%Z|1zw47~x&(W5JmZSQ)>=vJLv}K`jaQmjC zHCVPabr!`ag}=&K6fF4oS2+tFq)>KxT>rBC%7V*R{PXfto7a_&kMfhccT4H`@_bVF zt|WC19PqL(ui@TT0goHx72LZ3yjZx)S>&U<>KQ3*_i3=>zUM-AwXJo7q$p$gU4`QY z`GxrAJ%!6@7T3G8T~_hNkz=0-i`Z}PgPFERyJ77HW9GSy{4GSaM%4vHz4ye6r-34q z{Yi*_UEh*h@#3{WG0zNsEnd79D86Ss7xALH) zC&j|kNxrM@FYA+1T$cDPIoD&^4{ubd2?s6v9l=($xip&R+`d;KEtf`1j^aBN(sF5(`|;0KNEscdo^RDp)Yen6BjRk= zpO_+a2F<@;-NH9-)cRNU;uY?Uw(JELdo|9xj%gPPJSc`>i5K0oy?6z`EPbtr16G7d z%f7eky~}_8oA1B-x7%R%e`5FC|I!Yz56%*a=G+SDEYV;rS5;k|tGZG(lc&l+HxcZ; zByjy;Z1tm6oGh`%YG8sf{IzF^#J|rH{p-&Xxs`HQ4<<2R>?y2QAX8N`1*PTf)lP4r zj{Q)b=gUKz_LNpHR9cNtkwBay<=wMHXGh&R3A{L9QKslrpXK}-BQ5WnH?t+UJj#&! z?^2iPTTf0e(dZD08reShxUqAjP?#E4h@}kOisFqW&?t%Goccj&)=AFE#c7E2%+P!1 z3AlW1Lyb$;^+=XvCd|4Xi*p9x){3C&+8m)$K!^0oMs{IcXOkr-;Z#!Zc;%cyiEDf3 z5p4OO6=N50`{(C2@)J)Nhg3=0J{eq`Jn`+Qx+6nAx1RQSxAkiWR8C(PYe%+2 z&lqyLaP>4qzGEM-P^#(3tx2L>oQ)}j<;0g6mY|*t#>)C?_c0h4;De29;SMH21C(C& zMzHtUsQ%bb6-boZ2QddX;9axf$crb>j4-ke*ut>^j07!hHg;Cf0Num{iSA2a82IT> zxih#Nb#eX$6sfmPm%|sZ9LXT~v2!bKr5`bZQk_b*goXE`T6QM>2OC*Gadv|yeovG= zg5^ASaTL4bP~OnSo&ID6?!DLynnL=>4( zUFd5p(7{1vRg>j^Q zg(VlN{e+EdryhSi;`c;J>Vn#u@UXK#AvG6C%{Qr<3yS$W0FBn4JtNC<>87xkVsK{pqr zngy2F;iFW7yJ6!%X|ij}LOKf_U6*sc&7LvEsN}9ol@QN~!g$(!@!XETa*mCPrE6~> zjZ3@Y;IkhdUbl0a1RDp;CY(?8_7?4UmouGEW$%IUf@(E3EM3$Tyh-Welrg(F(eyNjJA)pk}>Og}f6Leb4} z8;h=C8HvT&OYIHJ_K_(^sysVVh_7hEiH%<>#8)(-if;5d_*d-9o??7F|l-goszHArJD40 z7DH~P9m=!U$lh0`bO;?RS3b%g2W1Y&i9m1Aa^PIWWfDR^O3N=6`2cJ8Uwx_ZN zV)x&SPSxRlJXro5({65Eg$x@_Iwml!43|`Lvm_rYzx+wf?LhFJgVhUY)f*k6v!%xv ziuN=$D4thCC{~e*ZwstK-&+2B4jLoN$U=Tr6lnM$&cd`a@ie`BHQ;Jb`s&ydv0$Qlsud1n_V8_ zvqGu+-1%NeN>NAF7ORpTj%i1O`~(Q;?7d;8EG&P-R^P>H4>5zr z#L{!hI!*O)Ox_Nbu>#;jG01mAW}}8?M|7uvN~}(8gYH7h7%t@wyO+WzE0Vj~c1oKtMvdbB9!#u5 z6FXpZDpryTE)!^gNj4l?g!Bv#5ovUKRO4A=^~u6$CYY5<2XH*IbZC~W#^_P48?qTL zr-qUnSSm9n<0P{#pKV*vb3a8#FaUh)EDt|=!S3=)VFNlj*~(4dJ`YZek$=uUa$QW7 zP}n)e30Egx?AYPh(T_?$@$fq|t@R?{?+ry`eVs-lGCb^$z4RdMNNfP<5wwtJbv6J( z*nEDx3NqU`h6aAKfkC=EtK-s{o&R!Sy9Rg!bF>rtz8-Z^| z2=JiR{h-kU;`nGPxH|rZuF|TeK-TbKhzkVPjH)VgqO;mOM|D^SMrn>p;FWftS`E># z!fO81o@yPrLyANCc?eB?$VOgf!BQBbi-`Fikmu5fFEEh_gKWF2u4aUzQdO9Cm0a=i zqiUbmzxNa`-{Ssx{d-UGitSvt^zY~M0+WtHz(2sgmH0k)7u6Psjn$1QF-EOTcWlwG8rzb`n=>{zbDEE+zuF{>IwMzQ?CdpQJ z4vQcTzJ}6}-&UYpOBeoUsz0q^629Ao%Ny}gYX!K~(k30tHzlT%e&&3MMIg$M$+;2& zp-r2xm3%yTO{P3fMN8Oe&ZwPlaR)ZiBXISM-(&~6JoxPFaA$+40%IKf7M*@jU+R^j z$k307AgcUkoD0twpEp(TH{2I|%WPDOZMZPbF_}U|iKvs?&9=82aowbL9_IrrvCfy!K_3Zq&M=L)mSpu2BS$E!0 zsxc?>+*Y546=jv$dD<-;NgnCK@aRY?J7SvJHbq>XGOmNbRQ+s=9F}Y-VcF(sesj2U zX$N?|bQq2>?B-n}AVFm1ri!$s{S|bal6($ix*(;ZmBuo{<`uH(IozDj!dAV#28*84 zckoRd+~5)UK}`+|N9Gl!bn+=Da;r=uq$1i)_rWMz=2`VN3tMln;Lk?5JhUyC+qWZJ z9@-Xh&$lC79@^#^>yIPc=TXRXk+KH}nY_Xr8q3u7(=69Jew-_d#rFylG7)(pKh)gQ za!FI-qmjrN^PKHPFnUsPBS5y=mmZJjJuXuKjRo7A4m!(d171akm-CcpZ)GA};wns6 zQ`qfxO@+JA+S(6E&c67hHbzX3O#&5N8)$e)VR{+Ut%L0NgF1Y;a&6Y(S@?u}=S8ulDU0%81@~byno?bF~a>3C{(NQ%w3Qe$$&dDyciRodrbNwNxdR}byx9C#-d&4fRD|bi|Lv8(ch~-XedXWR SSAKVM^8WyoQmuSQZvX)1?X4^T literal 0 HcmV?d00001 diff --git a/libs/partners/openai/tests/cassettes/test_parsed_pydantic_schema.yaml.gz b/libs/partners/openai/tests/cassettes/test_parsed_pydantic_schema.yaml.gz new file mode 100644 index 0000000000000000000000000000000000000000..13c0b8896decc64a0df2d5181abd8873694b343d GIT binary patch literal 4616 zcmV+j68G&NiwFR`0B~pm|Lt5`kE1%aexF~_^EA?yN&}>__ok(J$fZI^7i4lNZf^mS zKmsH?kWfVPMX#%eTI@Y_psHwV%gH{QYlF-|UT>egF3N zzuSH^+hyPW^S5vCfAegxwWBugcI4mnM(mq{xOXLOkN%cJ@8D)_&k5g_bN$7C?F=$o zcn{>&#&-=haQuO2Oj6(0#kHlUJ|1xB4u63#MXoTY$C^Uq;gKEdG~v0!j{Z5`|kw;}C+hZUwyD5jP%;P8MmT|4G)8U#+GeXt`nDBiqvrF?mBp>}j6 zaSt;^%jV`r8~^S!#@S47Msmc9NE~=T%joelV-Q$KhS?yq3^lcd=FyrAT-14HjN|I; z=n@ZRxDCw42))F!r12IH=t{u>mK4AB2M0&eGWa`iG9o)oTo}9v4ZZoyo$o-mcvX%w z-zNEgKYjbhZ@-OSx~y`qA=Q%YEW)cK&ge_?zfTJ7*I`bLV!W zWF93?!8}?f-W?5-Y&*Zw%-h7*R=JPoyQkR_2D;Md{V7gneiY2(_-VFtynCC*S>)W= zHke%%G23(#-`Q|GbJno5rvoVcy3pBPyLqz9o}A78ZjtPusUOZi*81kG!Z}!#`~BO$ zhm$G%x!?KU>y5q&MiE$nmpr4Fzyw0OF-S}!$1ArVq}ZXu?GXvkc)N+>dC9NvU~#fT7G1|NK(chR zlskhhVhZptW(vm9d{&RGRFMXr8os1zclkclRnH8S)at%u*2;WVrsliabpxgAWav3K zURVsN2%4`4CzDw zA3l2AO2!T=#~PG95DD~lm@tnt=zUi^!i&2%hw-!{~U)xNoUYn0ko1M@g)TDrDU zkHK2)dMl!F#t2Tdk6%IgDiR7nz3N$r$F|BTfIL07uN{ja^QP*-Hrlaej1iM%hg*RG zfx$HK!t`<_OSvEap#NRGnwUDBfyldk2;M!BvkZ?bWW{#z?67%2hK96@)!Iv3X0}9@ z_|NeDU^O6G3InWJo#&XD=8Gd?8;1VFU|m2<{b7*$lZl&f@%|A0N=~b|2KLi@ZE(%XY&dvo!U=0d+@@{>`WU zVj?pOZi{rbJ$B}lwZIB{M@I;d=Mhtf#pFZuIrz6-$Yzk52FMP)#|GKjf*2cuivGre zDQE_^O9y7|h5<}VT@de~#6nx-_E<4}1QAD&8730&?2lrmzNK4;3URDgvFbsPq4U6$ z22I)o4cRXi3zWl&tgQ*J#aWGy)dbYc^5FHvI*3DpnI1s=Xa<}UX#`ck7I~DUw&9i$Y z;l^CsZ-=J-ezZm#>Zmom1c9g=gO}LBiso1@)Wk__@ea_1_%!cIfBM9Jk+Tw@D~6MouUjYlZBLdZf9 zaVBUv{B}nvU3~Gp7?L*QA)PYD-b~NYh7E5JRFR3Vt3$Sa)U1X0I0>57=JK(eK^Z`7 zAql{6s#Jk7f1)0+G1Gu>29vjV(k+)|{&q&Ype}&gxcYkYh}tbCEVvc<+aNG-zlBZ6d zxtx1E-OZwS0g1^A$L4e{Bgvmkkj56;|LIo=#m^kjd&e=Tb} z*JQJuM>GFvvs=z0k}F!6k0J$~8c?svrV6!QYW;lL089P|MO3c%=-3QkV8PEtP~lCC3S^@OHORZr2C<4}L& zhuvOibz9Fvec(gWzzXGt8V*858n1_v>MOn+SJJ+y66p!h(uwy(4>f~~k5^VN4W$bS zW|q>N04*4T{{lrItAU16P&o+Vr@Di`4=!+F8V9#|*qo$3@G%IfvOf%Sbi9_{6^seJ z29>LLvr*K&a48K{=W{aJK{YpTaV|uVA9gO#O;Kt#0+t;#esj?@`pjG$CcPMgnAW@Yl6 z;@?qDhDwm31yq~&ih3tC{aFNYG!RLq2&^Ws^ExXK0CnYrK}x6xRmn$oWBwoZV{Y%r zknRj)yhsn}kjSp`;s#bj!3c?r^J?n}^ho%9Eo}!#7Gt#DFi=;kt{e;jX#EwEI87l$YU7E zf+ML?K8TSlI2J=bijgchmUj6NMzY{Y!0j=Nq{U_s3R$>4X`-F8z-ryZ%1U?IuTx?s z8M4kR(H0}|cL8!-Nqa7Y0@k19*%_-98 z`67mvD@kA_xSl}RhJO^iB^ZTsf2 zy;7$mGNhuv9EVf+*#x*y!k%Y_FHN=EUJs!u#AshVTE4YvudS}Pf;LEnCCC9MvWz9D zuN9-sCR#nu{kZxVe(U3kT7?|KaIr18C_4Ix;OKnSHcvJ;9=FZ1@EP1^o90d1B&pN` zFj2S7k_@hH+GddNg5jZRe$V19LG=pkR`>E=3pI5@94%2PwmcgXo%SPkS?i7ihoHT? zI*Q-g0s?UYi$|=jK|)$i5PyC+D^~JgCykiL=yzfTpxwbh;C>0aTz+-C z^xJ;{72|#hyKH`SyOft0^>4(VGD%5)R{SBdWV>I9KgzWA_3e@#4Y+2TG@R9`>gR2f z$ns40E4KNrT#4!Q3`rTpM{wuqbRWP9?Zl?etrob zS36(b$BUy=7j2U)oJlJmwawEL1^3z}f5|qj#TdHo-HGNh9lie|F3eSb%i5Q=61d$_Zf~< zNA>&4qAW*h+Kp-)Vvdm4Q}xfrq5D((D{)9p**H{^!ISjzvvG)?h&W9z?~FrU-Oq`} zYiar;eqP(?`h=fn^&F@?3Ud5eeqQ=``YAtuK{wZa2_Ij{U*E@9rvChE`1tY5`#9u5 zzlL3ozP?><;J;HAGfCCpOlkA8n8_S_egrewy&$5VD_cSz#muticK9@A0?!1;q`aLL zBF>kJKO;}F@C&8l56QF0gNPB`*kb#LJc$wA*kZdSPjo-&7m;Q5)n&P0Fq7`-Uf%Y- z1bXm$@Od9U*m=d}QtWyDnz4u8*+To~%nFT?xfvb2)wY~P8l*;TPwu18xUb@nsOVdz~GqkK9Qs#E=^-hw5U&@YeabRRMgN%>XJfp`aIVh1do zwZb#2S7*;3l@So%;ec(3d+QoEI<|1pv07fPI9-0|3?DcA5SG$ykDEqQo*~p>CHd() zeEt#$A&)Ow6?cv)87z*!QBN2lhr4*P>DrMdVwYD=HqCkV=&|NNS4Up2BYRTpL`yHr z;sBdxPf~ipH_|xy1vlGR#vlM=*;}19IEowC>&HNe{L6y@xf+;{_@Iyad?Y}S$UtTJcz__0ffjWh4-h0W z6ho2#aqZYEeg>i61g*mm=o=OXyC<&D-)ME@+d(B)r}z3#F7y+_V(-Fc80|ro)&U1* z+D0XT!q$0++3VubyO?vyfq;%`8x3^QTSjtaFYNpDmOee?54H-BQwCkmML4VoSPC)Zx; yL!Y?{PAMyW3y09fuDsoR$FjZWZou-(U-$lU_ql(G%i51$Nd7;Su*H6VQvd*`4k)+) literal 0 HcmV?d00001 diff --git a/libs/partners/openai/tests/cassettes/test_web_search.yaml.gz b/libs/partners/openai/tests/cassettes/test_web_search.yaml.gz index e99f1c2e13a14928c0b2b025eab331ac4ec326f3..a202dfe9c614179cbb87d0bb5321229a94be9c85 100644 GIT binary patch literal 27998 zcmV*cKvTaTiwFR#z=vo8|LncnlIlpZCVW3n(fewf^>51tL{hzIvwguCkw_G0q&tu@ zIY~ezkRaJS{J2LbLLnhQ9Hy4k|7x@9%E}Z$$iv;k51+e#ef)L!*zI@!{$D@-G2NKX zzyI-H|218#CtuD#{?GsV2mJ5DX>OwXg zy*e}M1$Q#;ZcTeIo7%&i;gnpnUT{8oCEKtId$OS1=zi<SD8!?}?CRa%;KDyb(d~iE4R`6;+zTwcX6J7k&*T??L`C)7hu}Sg;k1p(w`<}kJ zEBgbu%KY~~{%rU9@SlgT#lOQ3rBIMFly}!d3qF*YN`;-_&|R7h>!|l&fwgIAR>6f2 zIGQy7G#X9Iv@OeYnuYN=^FRN86=Y+xSuK7``fo{(E9q-0>Ds9IZQ{RsrGq{)=VslK z^j_(EY7Z75gU(bsS@LqTAPbsf)iM?!XmHC;M}?I>I-;bnjYem$)_?A&BkM5fp%$RG z>ohlX$GN6uPOaB0#qHoOT@$o`X9HvwZCjsD8|rTr@>_-cRw3y! z7BV~hLY?X-ckF6!?TWcnDsxNktMKpHsXfx?nl0y5cXlERs!I#hz$~Lw6i2D3b8S@3 zYp!mqu94A316M7>7umHXD7NqELjD83;AD=5uq*tM(zhX5naNnUk?7F%c+#l=%aiFY@;dcoZ3ep;}X%ypk6aZW0)D|y4FX7Xu^y>o@elH zov|{TO4_Wq9_~JRMWR&H1!kI+6vaId$tP{uStLYg#WHrz1G! zhBBMfYxw(x4*xBKri4>=@b%i(=`gOp?d9_3ugGsLeavoEUYCzxS79CBwUO;kuPbEF$(XiwqbCVA>hD%r&PA+>yLIZoi zeW>i@deZ{Qu#pT?+n8x~Au}nL7Usy>>2>R%JEd<;QnOSY*K6befIcf&cmoT%<`{0| z-0`r+WC+8f~-F#%ixD zwLRgnej#htv*2tOV27&p=csT5CZ11vtL?a+H>Qis?0vD7Cu21;%>g}~QlB1*WDy%e z$AeguGL!mXHn8E4#`0_+4S{BqZMt^B@kj!+gzLWy&yKQ|+)&{e)sidKJXug#0@3M% zJrr_dQR?gX1wHZw9Su$sPMz=`7$x5Zs5WFLv;GtQnZtsmtY)8ZhqiahGAIXgI~`^5 z1UX-kxB}=#ZFHOgC%`Gww}8cu@Ff{w7aMhCvr;{%i_&5OEJiJY5%5u6-GNd#nB(G1 zAL%yG1U^tZfE3^YE)l&tRkG9iaIKBhJXw&wnfDe&hrV(}&dAlm?gMylGE&{vqPQ8s z&ZZ5EoFdW8pweIm`IfDIPluV!q|pUMqWsZv3cwG>0kjT$p0iE&ppQ)#wDuNG8ChuG z0)x)Tb-pL-ArLCkg(LiX5AwQif#~VZ&s@usUDzc! z_u5`qbnhwH`Iak)e(??d$pK4bTm5Q=|Ln$66b(v4I0#J|%(`RL(9Ip}W`i^uHq39i znuSv&a^1%J1x^}oHnFS*C|{B-YXB$%z%>u9q1ghN~H+OqLxL*mFxR0T&sJ1+y%ONS{?XuYUjyS z03OWTlulf!&{G~0Q^bL9wRFi^!cM-C&+yhGrioKpfaoC`p{3x00(&`oe zdSBt&pyU+3fgFYm|4n6iy_$7wFhz!7Il$q_)moO&bOdU>M_8kzue*3P%;Db1D&=e* zEi~Yc1-u=oyTK0S9jytfzUmgxs*oY}iCtTO2;w2^$7Pv_UuQf-e{?Ew6 zj!STLWWfL+MiVtSVMpQ9+`=ondDF)TKUFT^;O2&_X%jkd0jF zWIRa;5I0Xu5W=h`8bS-o1*jXhKwA<_@Mw4f1u}H#9Yn1R?@kd+jqLda#OxMkmf%Iz zl4@Xh0`2QVw?!7~?2)d_6bbOtgqXb#Q=(nc&yJVz6u9 zaT-$FW?CyOVFw-^(Ip!?G^%{RMa%h6G;8m-XsP=KKdhi~A5y(w-Hb_kah zyt45794v^=BzRmjv^K04R@!*A20AuyLvodDh+g!c< z8&$xBX7D@wTa-yhPkBUW^AeNJ3ko)uf=LHH@hQMtgni(H;DHif84lqSbRjsJvqgJZ zJ%Xdo$w9RZY+v5z$>sa+wmtNe42>0y00_NKi%(o?)a`K7&{N5|>j8U@SHH$Q8j0y%QzCnaB>RkcRf-cgt-Ct!Q^<{tdrXp+A zt;|$-<$!<>$*xLc+!dH1?g8;9&z;tbQVTvRAjhNEo54QO0JzF)(;e zvkQ5{352QQ(HGQ%>EuKAGP!#(TmXy?ECe^+4!-%DD$^`c9<4DE!#x#4t{^0)Bs}Hd zJAVNsG0c)I0$i7r5AB)@*DKNyV~ac4rkR52SY5L)pl2Vhj=J4lsw5Vuz3F|^R+j@; z?XQlCtx0NKyATGX2Pg(GDP!N62)UL6p#jN2B1Fv?Xk)`G>UM2spfS1b0&Pz&3+Zuf zE)5qzDsVrV%So+pgwIJ{h+vq*JnCoXe}3mCe&;5B=O%t5l7372Z%O}8l=QEU-FCCy zefTg$ve<6_g&ZAL0H6Yno0(kpGKO$T>tppOW(}25ZK=7`Qc(Mvq*R8MTCc6tzVfEJ zD_67>yd}lFVR=!U$#!8ev4=}j_zDtS>9N17DD)Y9)cpjF19DwFsV;opm2=*bO27Kn zDw3U|fs--E3O*Lj7QV~nB1V*!6Un4m9{kF>2WFt3x4b0{P*27J|D4ZG>ql=V)%wNoN&Zg_(Zd#^0o{nm7 z;~cSRXi5_lowpXZj zLiUB3Q$N)u7~Ya{Y}bdDg8!~m^I&-`wB;%XKTGWqqDeYO#g&ijc%<4~>sw3J)^e)b zo$Gxi-U~lE%#*r*IrP?35CN?p*VZ0AEmeBP15H z3xy@=w=%xuyQ^hM#snS>=d&-QvftT~-V&c3`pus`??oCr|K+J6j@;ntS&r7pD|LOP zxgCnusSg;d>q{-GxP!b>X@XhawUy-s(2hU^?tTM@UcEBYsX_R)~DzbIzZ<=GAeV)?#5 zTM|x^gz}1;S;X>wSfG5{Q)NrWbXcQ&)$q>67iV#o^0}*ekC*B#dCOV7lVCK&nhp@^qkse(w%E*_V+Wcbt+xB*q?7|lptED7(SsQ9SA%e%v*mm;Lz zl3t)RdRH{yJQHK^*J7jW#P3ZIUWtyfC8L6*v+yaG3g_Zb`qx!WBwYXJ5%)-~SJc}oTIsvCfyLk6dK4mGT*{$@Pn3-dI zd4GcLqCOC;jYm>*QCT_0>USmQI0 zXJ|`r$y=byd+?UbJG%k9ya#c~?w%XK%X=`F?4q~Tmd&!oJAr*1OflJSI5avDjOC~)xV3_wHFd3`g0Ak*Q!DKIf1B!VM3X{F~ z4KU_CI81iw-hgA?leTBZ;0-|LJy=YBJl7$a_nXWoO$WG8+Do_P;4(>DgM0yOWzW^%r~4$-^^o$23@S3#Qh z;4>qw;B%PfJqS(q;x~Yr_n=f56WxGn-hS?n)e_zxhA>}*t`d; zDHt%l#3pC;>%h%>Fq>RoUWabpgWBY2kL%#gdvKfF z7`zVOjE&r!oyUS38SbbAoEDz0E-p0ShzU`}`3#GH5WlBJyx)-mRWzOvF=1YxEYfN0Veo;4&B%zVz{G5y}T)-poaI z2YrfvV^Np|Dn&m|lnG^=sDq4VftU6mf5Ro2JHJ-vx;@N%;0`epP{-5fOcYLFQE9N2 zu*(cSM%`8rPP+z%HA^luNpOO={te8X4T7}nS|Hb;61<`ijQRB5yvQrB%JYMcwhr8o zos%4e7-?8{^W5R{LQXUf@$R2Ki(t{}=CP!1rf~OoL5Vq7ci$|3#&8E`ISMi0C5AjN z6-J9OPg?!R1rqBRNy6m~P8;}cYkBD+3)F9R8i*BBD8*PheWM)}PJ9K!CH}7L+7Wf% z2iQ7z(S`yK3yi>!Voou48FKMK3z;Ad#(G6)IvgsYU=8x3NrE^*9u?wkpt3(~o-Nh= zZ+t(?qY1)>XfLBy2u2esMe%P(F*zeJ4G7>tq!aKY+}o%&{B*H7rnRD?^uWEvW5J=k zt>RYwYyhT#Y4zy$1a3}{?G@NR=#aRfd?e7g#tay+a##E#^kv>du{5zTRd{aTC_J)F z^K22j97|n`=truBL_QVNg&BOSqMyGEF%f3!rnLe{%HATtcalX>L@A>1p0lJj=FTz8 zR~4k^U!R@r88D|@`_V0G|BgaQlzjlrg+CKSOdk<3Q4drm`{+OPKEA#`zApS}|D#MX zG*N%-G11+8xJnwc&`k?#-U@k@?j5_*JgM;UTg5fUj1Ym|S9$M(CjwQGeMzHik{zJif2mhms z_Il?bRl+!>RYvcx+;(Ng@*3??=S^Q)bG@!F^@^5LM@mLn&gV)+%eF@v>~P?yb;VI6 zb)_QASi~r;{Ws$+2+#}hHeM?bt3&AeIoQPjE)cx?zje~OMM3y8B2~WN zzwsJf@%w6PGlZi~!lUw{ZPQEm8WzdZv-maWevz6s!8?X!=>=SF!hJvgjJizN5>ePA zwgrCoXTdQUlA;(Lyx&&8``sduLoc07b4aH)L7f+};5k!7X*RP9o5*~Z!ndudF?hY@ z4SyrF0ruco~F&QDS zR5%{$*nopLgqzeN{0ep!cq~6@KON81#pA)_<*DT%oaoRYFdTb;3F$tF8w{}0l z0sMpY)?hZNydI*%{W-0JhvKF00z3AMP6m1KZBK4*G9Cmr&!WdiY*1_1C%dtSYihRf z~QOqGb#`3cgbl1@TBa80AWz1_~@%hUZq{LT42^7_I`panzw@9#U!s22OjuFTl~fmu+-u^BJ&ocr=lt9f21uBza*?l-hB-0)ouZC=w9O&c)}Q z+xoKiTVMXxm%sJpZ+-cnp)Wa-6hrybULLd)sFUJNy~#i4XRG~>m*w3>{RwpBvhQP$ zkO*63M4;`zxQx^4x&UChKb-58+L8dKLA5Fs<4s+bmddERYS%GS*g2}pL0(^W=W1Wh zXs+yPQip~9f_1b}jjmWhs)yhOg*VPoPyvRpX_a~akd7OUY`Qhh7Rs}&W!l4oS=r1) z`#cJt-SYFm;y0FUa+(-_6Lv1R1(9#zUu%B0+NE`u7mmfbek`mfBAx|Sv#P@a$ai)@bnCLiFA%0L z#R^)y@sRadm^si8Zk5$Wof9qXKoEK{*RE(s&8@BU${bY8fW3IM3DCTgfj~KqOFRFf zh$F|5G>&W#N7|@hQ@R7*FQ9SunoUAd*_HrE_iMb1Xf7bg<;62kbw z-UVY0as$H4)9`dtgohElhluO!OYpOV#3`N>f>Dq;B4wQ|M4evrv4Nxj4_=aUo)x_Q zanF*7;@w=Bce(VCHU%CFXGAl9Z%C$abS8X7hZgYEn{#am$5okI+OqlvKU&7h$U`%B-TsH|%d3FcohD{0l#Z>=YqcMfrCO+uq_(X+vzxQw1jIueDmF{GEi6WTD5bP z5pma?#6GN{JBaQ44{imc15A!3kntO3FRO`A7gt<<2YTV1-;gmc*5Jn@%`PsGXnj>r zVG~qrwgDzq4z$gR_T24)P|9%ab>8%md}`NaaJZE$+EzRQ6B%XUmj<`$dPl!NFJ$ep zopX~_o*7^+y@EFxUv}bqU$=OZ*+k6D@5@i~=8fy@5#D4kj;eMc0?!|(DZ9=Dv457P z9AFu8o%+hdG-WR?i2buPWhh-i?184#Lc^u%U_~^V-&==c@Ek9qOO>1rN5Gy;q0w zFy6T;eN>0~@Z>X9iaJzBXxN?d?gKL-ulYyh-Pc5yLnVFfE_wH5Jo1`J$I3h(!J^)wHK-2%Vja?d`T-7|9X!}Pq9$qMf41ttXJt-jYUspz}#>(@1dvv z;#XZwd0CBp|M2kgWjFKQ`-d~pWjB-FKe7||{^364WjFKQ`^Qhf9Qf+eqpG`lMA z?8Re-mEy?zWmoE&ykGT&csed#SSgOYGps?(uo9fIMH01F_%k|N`V@Z#Fz&a*os>VX z`$24%F~gm4_|wly#R_)Kf?k8Q-2bz!DX zkfuIT=b>~_6iu%HsbbUA4f;e}t3Q@5ilS*asB=F}z0!A|p4>EeJpC4?r~l&4bM*9m zBHrlb@$}mj1li;C-25f#Po|0P2*uyhpFHIrKNKIKKV3xWG5yI{{fV@2SbzEv>xa|A zq_e}ySX@lG-=E41-|Zp>=D}1Zz1tO+-R-_|dpMODzT24!ej=4gBv0^**oi-xW*6l= zP%iweoKwGu2X&rHvx{=>r#-UI$vJ5PLwJ+x%jeSUB5yLw?a4H|$D70-5|H4p;!WPR z__;JY+MN_?!)quQptDk#txM?G};>SPc$fN;<4&iDOjw zP^KqnD#q1c1P9@FkElv#PtlaSQm=t7AWee_1yrTqdP-=yNA(N4q9fD zPfOChGYznsOkBU{*THJ!YJ401*QSZpijnj*v2D%MLd#k5Q|{Q`F_zBsd$J#1r8Ake z5i7LR$;Z)|Jhl0JN|WhKp>6RkojH)Ohw$tsJ;gHW7tu4?WW7Sq!A+ltUuUmL5r33< zY$I-{t`j4c6gPL)xw{EL+z_(bzwHlkTD}IlfO78pJ-0*1_HeDIgqCvRX<3T}p}Sm7 z{IFdVQ8{zPF3QG%(6J?709#;o(OEU|6}za0O$U^8=*vvv_2n)de5v*l&zE)e8+gwl z-O$qjQOaKPff52yDyIR4sP>Jd!w{Ep?v|X3^FUrC=YBLRHc#H_$iVWDFXG8SiDf*V zY?aet6RnbN6M;9Oq=S|*N{4lH=?g#OR^=sgq;S;|XmUMV8TIF=mQb{^3L*pK%ja4e(URoyka4%*n@#A?8 z;w9Qjflg^pL1ZQ76ZvC(x37Z8a#!j(h?n9PQvr|Fq@=@RWh)i1ZAo>K4%;R(6jMP< z(HBZ8Ev-yCN;6cUd?=}<86}+C>%fWQcs7O$o|az&jYYZ&1A1NojYYaL<>tPo;~dhJ zE%_p7EVldbaltsXG8J9tjO2HnJEQ^vpHq?#flrByD~XkHtGnS{;9n!(DA-JGyhy%L zuz9f2$h<|L}Z|FtYbaSr=M(JlnaC$vV%Z_`Q_XC%edUzI?uOACq;l zcL_0>$mru#K;R82`4D(ILOK-?_$SJT{x%3aos*IRTF#PBOO$PRRLwl1J1^wUp3$Ax zXDH+z>A?LrG(!)#o7X#5B&$njyHZSkm(ImxR9H=b4~@0DHN6mW(qT2TQ}SVmWL|k1 zQtofJ{G(r=a+iVDlX?wye%WqfFaAV0In1BetB|ho=fyH24%>b_oE+xQa988F{3+o} z;y`zASe&$+ZKGS1Am9)LE z>>jlC)^M_3Gt3sZhqtyb)J^^JHjwkF4QyYh9C7JyJo8K@_=?g4M}@PW zEk;+Ad3bqIt_K;#o2P9=4Tyo4CDVSb%#-%*-0(G9aZPe zQF|F~ZS>f$RISFZ1_wfB!Y{OXa~fQc-yICu8f3mXRv9^Ux*CRqKn#vf^C_&4V$s$7!N zkfZ1u$x1Xjd!@dV3XbUc1h&^JWV2ki)?)6*=AN@zPXaPRr+V zk0NjKIMIFii1)=NEwt<`4QcKM>*IQm90JFHxr6beF`{An)pw9wK> zJ}p5ttKWvuiZ96}menIGXG?Cms$4&Pn@%bwko1fB-?)ue;Wnx#0bHI`tU);)d|5pu zo-ZAxBBzBf7fI(!H7$JUCY~>Msc1WKQu-;Fezjn6PF{YsN<~><^`!QaKe4Iw4KKIz zhF!YPV6;Ll+h2jv@@I5~*j7t^$-BrBD#y%~|48?xkVR zNdtTVcr%~;$vE1$X{93Neo~hAV=}ov{+hHV{@ySBy$}kK@4nwJ=O`YAg!o$hl1c9< zci!(zxxHZW2)p0ULZZQI9tU0ObvTMU9=mreU(Dkh0l9km=^&Urb(YbFe#ocmLTE}HYhJME)i z$(Mt~^JS@Z&*Z>gIY|t7Gx5`}j7Bjnv~&_bNvu9}vHBn#(_G*_e@lLOf1Ig-iGX`K zw`$W8P~KlN;gaa21ESiJCmCGq59(sCB!@GRUMcbh)T=FE6 zUeuwtRq$0E8kJgNFaClqh)jnH^^kX?l^fmfDE{=8d{w0fevY&HV=ky#86;kTBV~3Nlktgl6?*nNoq*6^OpROs6NtdG0dEF zRDCA}RYp=RD#{?ZjS zxO>o+pN@Nb6&gJ3;$N6Iob2$j+8-o7y!=>qUiRVMstZ^at zb?3d&-Rs`&ydZ6ouiUn;G6;Do+vEA2z?)dnF}J|W%72KoGRMu`SW>sF!7m;a9NS9Z zQx2qZ2S^_{f~LsEZ)&9jzsGy%M3z*Yl55rr&SW%n$xPz9&ve*iXfjqE&-17X%fW8(4FrFD(&)lTD}S_{!ZO&{H~(!k;NK(5 zor2mmY~x#XvV5nlQNf0PeDN5IlNa}Nr^KhCM3&B0{t zXTJ?NDs8k{d?qliKFB3KuHH1J*6DDq>rbMfbJ=#{p>s+~y3P&qiSIghNX7K4lX|1P zjh*<5@)N-l%eX#w6;$nVn!_n=t90Zjq$p=QVL1xCbkmWeSjx%IQH&-j$x&!-;&T*p z%|2Nv$x$pj$?udVo4s`8D70+Sa}-T$nig70iKk_DT22WqbIH%0b?d`G*L{9bye4mk z^z@b6lX)|Z=OX=ZishAdcQr+s)T{EEcax5^N4sw%KkZ@bWAox&l(bO&>Mi-9dhjtL zQy)o0)vxkNN7YA{StqXA!%-^5q*r?^tEs3$l8?!+LTaU>N&m9wBwk;_(#5p!rIY;X zIFx-$nvA+xZFF)|(WD=AX9{K$gBWDzKzF|hfsqO%xO`0hY~ofrDioGQDfy|*WDk$v zE#fLhVuwdqVsng?$ap*X)gE;nS$F#iz3C5ge1BEvIi4o=ulx3sdXw|zYr3S#p+uKg z46lP=8BQwZk1wmo%3Ze$PAXFFj?$NtpK{-6jmP>(op~lm2BjpMtriIc&3ON!sX0 z_>)Joo_Fn8+%Dk*4?xzFKky(Wp?a^WU6Mof zem7}e?GeAJ^sR2vyscUMrqZ{%NqbA)(^UFaH)(EP#&0TptD7{Jo|sLgm$IA8PQ2l! z()@3W=x)*>tsbMP^hvFdmv`YAC-C)sIj{JE2>6!Flqfhk2j&y&R6YZynh8ubqfAvI zOeL3R-~_%MRp-s?(@cNn?W0z+RLh+LPEDPgqwY2pC6>UO zv*edpmT)50BVE3J#grl0Nx^{Y>Irx>znr9_WN4|Rpp@;N9Pcls$q?|6HVicV>= zt0sOx3{GacmNQRM9#Tm`97^r7-*J-<+wP2U z{+2aQ2h>=tbdnD>4m3wibVxL#CO;%vw+`3<9fZ`1+ZFW}cm7#+q{MgrnZ@mj`papV zOFk{9F~gz>nWww#yd{2~?#-}hV#0WtYiXsVgz@0yB{iFpCjHC7F%{ivtaNl(QTx^8 z&(q~@eJYyt15M@RH|gJr{ybx%7llfYro2DVYeFSzU-4sZuL+ePO}PqwMW`et5csT` zcnG}QFhRr51Se(pDtaB(vNnr;Ddop9S(l*2)~!c8_)j`MWz=XvgGtvH*eHYa{Cb?7U9J zzyn8JR+B&QV0+Je@?VK;)Kbt`{+R+el z?UWLi@z*r(Mtb^wPYky+UKL0@h|*o@r1R0y*W4s2j(^0{aj!v$qRqsk1~J^El%$Zf z#HWy&gGtQpKMCMVC-HpQDDAY-TRM>7(kSAV8KGA#_}UB9^6cuiwpyq|d5<28+a zm8BsVFxyvk2H{OqU((ojmy)!HDBN(AZNv&OSiCmjvEXQ#wB@CI4FPu|0x z?hC3Egd6foxakdBzr~ezex1Fhc{j?t-@yA~xCwrr0UEB%l%VQoiJwgjY9%eS%p|`; zA#Qr}*Qev!l|kauaq;F8CuW{5&{RwQJl!qm*Y9+M>f0BFa3v~Ke*lG-HSF3h9ie)- z%hM66$5yYrsaVn*c+c{QZ{U4MM{|sJznJ*u7(TR?r-@%voq+bd06cvXO`m6TikU#Z zs5$}dd7j084o&}F+b>xwXm&iLBXs^7LnT=7IR$|)HuE6 z8hazx{pQDGfhNg*>x-+9`xFE8HhIM1gx{LUZdh_q?hHINkA8|Sjn$;mVZ z`3vEj2!C_Ke}fw#LaW2QeEQTRi#3_Yqk>}#@abC{izC)5Jkt#9PIWcRAY}ado*hwe zOJjz-MVYDuOjQz?szjJdZV;xLy;rK4*0PvuExi`w-p}5W@4ez)u7<}^mHzaX!#ii_ zOo`B5XMcX;=Th(}`}0i7xfC$=m)8l51?Lqf>9OE(J#S2(>{|2pHSlitJ4tWgy&R;Y zfp`9y(zd^+f%mh+u0_21i*k|NI}>g0)l<1hzRTBi{Sjs%1-kx#UkF-#nT}@Y?FY-+ z{?=yby+?9Azov^)yFW{M7bT%-V$x~3N`6PGzCNn$Qjv#rl{Dlb+w|sax~V7`0-9En zUozB$3)KNWb#LW;64FJj6NJ&^-66G_Wc(rMW=rq=1u>7`;2hg)@$Kg51& z4|A#LEa@tJCHb8tU6S4$tC`72Wf7QZIThtsZZ(tq@~i2n055b*MW;07^op*C|8F~` z$@MPMY=`M#y_|~S5pGpV{_qHGy-G#N5b&sz{F0&Jn^7M+W+NTLBb3v5ndhrAzr2oaWH0`rvWy^nA+D(}!_O|z`fHMo;Suoi$sZnJ`*y(HzU)TjTooUWI zq$Kp@D#;H$*}#+gR1A**dUg^&JffJ6FrIr)YU$U6@zh=_I#Ricl=zNRX{k`SvqR!9 z(3i{NQ7NZ{FW-{iDGgqA2NY5oARPVNXX=u`)j!?aNqbey8q57NYxund@ zRx(QRXJ(hvQH9h=eihQG-8a(0mrm08Qb`M6W|GdAx2rdka`y~UaQEEm%$pd#l#*Yy zsf|mhC*LNcEHKq@_mL7Lcqf;9BzQWikkk}ZA<5aa&~ladO2cD1D!Q^^SX}}u4J*ZC zO8By0O@7DI%v9>m#rubsU%dtfg7=UA;!nar2HY3&17xdL-9Ic`@`9mc$*)3E(@=%9 z*T&5+hW$wVpf{jtC;5Zk=4qkjAo;X(CnNK~rAOatQte=tQ!!A>9VCCC7TQ9~X$}$e zDXBurrlAT+2foat0}7ITWVqJf3av=6SWO8SqznA1#)2m%B)pu0Dx|k`q}-)j4ZahQ za*ttu#*GEvaToce88KaX?Piw-F5|BW(~DK^@m%@MND8&cw+l!3_kJ=~_aKmtp3Q8S z`QPXQPbq)2oC2RK5j{}C_*R{)Yt{=6&X;P93O4-XFFWq?CCAF%l!n&C$(+=3=BPRu zN9|=CrN@5$`M-hm`*BW%V*p|8XTJ@2BW<+WzP}`)5Eyj8#yLD(lTteX{n$VR*d>U_ zsB^n>Ihlwx^pA2 z3)vxsChIOp(1|EG)hThN!aJWWS@(GhO;up38D*;CM(KABO2s&MqZioETe+N>bwZOK zlO1ZfZg6-s>)8%9qHf^TanE(Ap$?*&ndW%$qW)kRMifnXf1=lP{Xv>? zYeSKq9c;~L%50(+b^Q@)p5^*{t4s6UWMpohjl2C#!y{ZRpZwtwz&tLT{KEso|Moah zPDy^8$nQ|KM}9P%R1Cj$jZ^Z6-%dtvvG_Dze*7*zJr6HmyRLF^O51pO|7-k5XcQbS2X`*rooHSnnJrh zVqO7X!lO%31th&$rIAa1vr2uW+u&)%Hm!bjI?i}Yemc&S3eL3N1$EH1(lHB3byta> zh4dENt-5U!-R)L|9KI_i)mvP*io+LPW3Hkt<)=2EPItuUItPEY9!PEST~bH+G98my z<-R5iff6F?*kv9JJph_Lu@M!D#UUp^`R@*y1A)SYU^WT)*sb`UQh67-b=XDc5d@&pbuwj)M6LZGxDC6+ZseS5gtJ#Vcv0&4 zw2W`i{DLL7Q(9^{>%kI}QSAVPIq2n5U#L5qluIRjWLnzjZ8ngGu#@8q)T#t@1XXLy zu&)*z*HLb$prr_1zrdvWo{okYoGbUXDGyi`h(AHuD+VdLN_jAwO4{~r4HKVp4Cie&tk1UwM*bU3{M~voQfAXpfsg&_mFBQ4F7XW~>P$feXt@qNQgaBM z>#$o113j=Doc2c#>DhssG{4cvW;mZN-e%&U^>9Xj z@#PbP*25#FInM3zLF@j6b&D_@o0+}XIITsg2fPR0+L`s)orBC(ZGvim7q@jdKAd`G zZh(IeI!Mg~IE3(JZP;Qw>@C?D>A(!&yYxUwdCS5Dn~4sSDuKPtu?ojD!@K8*N6VX4 z3cK@O+M7SV0F8s}Y=cZ&C;~X{YuW%VK+rDL3!tipE>R;se8E_kcH~cRU4aBRtunqu z?Jxm)nf0Hz1LA;?W$Hs)hiA-&LY=%iIM49k+NgxMibe@fQF;E|Ch3 zT&wTRWM@CSaIe&}pN3S(8ek;;IVv1M0Oyn5YRmN~$a)ue*;6rjF~e8Ww;nBIu!r{M z=m?Jv!Nx3g0==}5sjBJ*=t{K~nhhfp-prbZ`Lb8qfU&ZLZ#$ZVrlfN@$~KXGWSWjN z-%jf&@mtXmS8u32q#a#18e@qswN?|t2A3xQk8 zZw25t!r~iL1;B6Lu=Eb3YOVLOa0IoTs27<+B)m!A0!%MH%N?_vP1MlvD7+ZlGuL(E zHD$=0lsk3eHD$;XUH9okP=*;$hJ9@eT+u6)46s)4^!s%Vj|G2cgGbbdZ-=|(drE3Z z@gt}T2%RdhQ!-&>T0iB%Jb>eZixyrK@AA$VjB?;j_?*IJynuJ~im~fJ0&}ar;*WYJ z0iG}{Tg%`wpj%OBUBYPO8U%|@QnUUINz|wCZ9Dcgz;iiZ`3-P@Wx}pV16p8xr7uS^ z7r%QLkNMge_;jb23D2P(+~G1zS0a&sZx;j*P$uRosuiMk*piFmC7rnQjFTl;6P~pO z?&*lDVDiEfOgPwul{w;BDUnqvX23e2>V;Fz0I#AwggZe6XrZO?pLCCBfpyDpY8f<_ z26$A`a3$AN(2GZYuLC0q6KhL#q%F{VhlU`XYFXmOAe=A`z*8E^U+?fuptVcHXm)~w zbbzLCQGgqON)4UnI%@h3PT81&M{xw&3YRW217V)wC@)%Qc<~BNyWuNF<{Fs>K7$G{ z;~g!3LvE{kFWR6lY9KUZbn+t6q(R_J&w(d_*4?`KJ?#~-p(#V4st}YYq^DZn+d{cBKOw)CYi8ZmEoY=N)+vdc!COXl? zwr$(Cory6q{(0y5F8-^owY%4@>b_}JZ3O_^jM|NzZ)_a}g+JnF*TNpDm#<^|wM&8>?*JaCSgd7IsZAkyfrNP`&dDfX$F6_I7=#kN zQ3z8YV3?GtKvz_2m#Y;JL!f?+dyadKUP?+qN9wmo?u--qP9;L_ZZB37) zhK1)g4f%^bMG^TUtf@DHmiQqR^PbO!4Ll_j3)@G1%wVATyh=7Gco*nGrziZNpZ=8gu3Sh>Q+E%ZGYsPl%CCsl*9#BcxG#)q{vN8* z5&xWu9Y7<_36>Gznuk)xPBhB#h>Om^pc)-<%R_otWQ8`CIb+!WT8}V>LZh%>E(f7E z)aFpTRO&7hwb1&C2%KiDyU(1T`{2y}i!he<%H~p}{s@e!&iK+Q?h|;YXdc7}H@^Y# zom!Y!@Ft}1gJt);wrM-g$sVZX;=&WbS~~?S>o?%>^-{6k_&!{yp=D^u?YnV0?I%i> zZN)={5Rfy2HCCslseZPa5q%t%@Y#d$IGw_vvNIH!+uysij763egM$ZjtF(5la#^+{ zp2|YmDVJ|iYp^y!PtwHPdqz3BhGY9UZG|0Fl${QmGIH*DxAV#}<6X3Z zSJiZ2&1V25+4bBwrm)GLO99cGwJ>-VMrTknyuo{&_m-pteYGbiZ>wMMw){4Ss)`+_GfAhgZyoo{esT&WuSVX&{`;a8JT8YL z?7c5VuoZjLcoh-Nko*u(LUsu9-=?E7j|C5awJHQGDp-QSH5A!6PT5-D#UpUy-+$@> z8AA%!YSFRi1Y;1SSObQ`QXhFEzAAGOqkJ$lGk(Amb#gS)Nr%uNQED({0#;_*O>|LX zEZ^!7m}==D+>Uo`bYy(23aMc?V>&|yf*9o>w{63jW7}j5i<^+(SFQPQtooA3mm&v} zb!(<&n-|0#EkyakTTSP061Wfc5c7s=f%x<$AillK;%MyLZfgbQ9E-PDVnK0m*jms@ z3aWXc?#q9wQpJP*R=tn{_@VI`(~OXnt8)a)W#McKi`cm#(etqawG_7tIY@pU4q&u| z8Z$pGxeezQ_4i~b--wHh$!l)^*)#v**Vk3o@A@PkBZ3>cEfEnzRXZfygB6r#%L zxp(Qh1sPqHA$CtaSny0y>V*OTapPGRNfQ->gqeKE`4NW{@GIR>SFaK1<)Mm|Cmu17 zv~y1MrFJg(Q&J#LQ4E`4UF9q!Y&T_9{4gYXmGIe;Gowe6+eM9hnK|9`dEYT9WGWE0 z2L%|i9)r7Vo9}LQJ2#-|FqUtQf#i%-(rlS$L_@IX0^Z}-Bes+;L*X1vTgcZ~R}%V; z`C~O2=Rr*5MJiEU8`q7P!we`*!&@qXGatGjl-ad50qo%E&rH^e$znA&N$A^7Sl>gH zA~iZjn=F1;Z=qAHdx^35;ZbJTw@}J)E7x5L35E!78!UzN^N2qQ3F+Ro6!~xJug5CS z@yjjNizD1mY*TsxFOa82FJ%FgjYOf;R@%^Fzt-ol&4p$3D}2r=@W_mn1n3!CPcI9j zSHu(xcci*SQREuKyI@+g)e;OJ)1s$s^E>WOQwLb3=wGdYj)ZriqxA%kb+E|KG9Y7a zVr^Ju#WD$st{|nueTfPyiXkT{U}t(FpR#>#?vg=NR9-ulT@o-57WDLL}ah&yLKcQ$eEB1o&ev`6CRy{`%5s!JqJ{N*nhazD&#vD zzNVEr`6X_buWS;yzAY`)_qd=l%qVf{!!<^8MHO8%PW%NhUC^L73 zMyb4}ycojwPzY0R&pYK}wJDRuc#@JQYn3cG)mD`eaAX%DY55Vp#M6JZDw=4(A5e8o zc*$HLR}8}ieNmM^zqX^*yW9p%UO=h^p9l(|qI4QXVR&oVxJi2bv2+FZL=7lQ5@tktLT=yO{Sm<0`vO24R9%`g9R;&oFQ9PVWYs9oK8YfBPdLMNEi}a98qU z#s5szjCvT?#;O?H>SvD7>YDJBN`HVc+#cGn71YjP_ascC0WU`;pmot@n{^}#bP!RL zH>XOU@@O?zNwQCNmHmT)BKW${BIT2nU!Zy@TLBPkHaY`=u*DxN_g*3*BJi7VgFn3) z-8EbW4D4$P(!wSfme={*B&k=fOa?5{zKAqT2Lr&WAS)pT0z@ z1c0!h@&`oT<$ETNDyiWeN1V!C2SE75|~zh<7ufZAY=qH1CE}Y4e$E)T;@ua%%6#sB6Jk1Zp1U zHwFzU4^MWOeMnO9E!O@^XfKLCd-g$1;6JMkN@T#f07y;zgNs7v&|IFO0xNg?jv8}4Tm!cy7f!{!okBl1p%6C zYQE!RRT>cb9UN7rQs>l@S2ZZ|F~Q!17-%*C`BVm;Kr;hxWYXkS)?RyPUu z6HTrhpU(!tMvAUDXbdi{G8i@`Y@5hUB|u^*^9o0m!59X|uq6)0FY8puu=p$9TlcbS zirdXDTz2H1&k`mbt4{oIAa>_G7s{)5+)@Sy=Bc;zkK|XF$>7^ZFAy==TK|Vk;cCMD z(b+7qI7^?=eso9D6qC5F=9y|)pEKp+LvH~V8U-Xzqj{8AhfOC(wq@Sl5ZvP{qUI}O zf_kaZGWYio6vv^=;MuikFz`AK*INWn2#O>{@-+$v>2}OKbE+Fh{RkcW?TNQMFT-4z z3OBCRgfyOay_%KYJTxe=Tm*#k0+G};UpV#BwtQ5=OD-;%4rE~5lC zL`q>(Q#bs_*dc*HGaF$gci(u8O>Ef2mUC@=+#hywza`6gbjvYb>y{{MqZl>=<*>r>Kjq476yv+&JhNtJGiH69Suuv@H zpXG=#iXChI{j-U@kiPlQkD_0sQ*RP%qoedNwW*C`JP4npfB`6&hZ99Cf#Hb+?$##x zB?lCvmuL~)O22u5LO2AtSTJzpAbam>U|x76CJ65x(eq!xcXI(eI>qzx0JiZWdYGdP zWYP-=xiXflzX8bJd|+@A+K3c5VEFIcBagumW=!y-C(?YF2OMm2LN8c1bhbxGoAm1;P+U~`Xt!=W z0;f7e=s#)G-A&TQ9d5>nk2LlwMe;Q)ON*Q~r0GS&bf? zy*d?S{{wb^1g%GFwu)XT@HN}xKZKItKSX%qvGp&)wcexd6*7#a(E;H^%jwsUtD_er z8qV>(c-ACF{KZqq+Uv`us1PZ>$E*aSXa^@3&P2e8@q=sYr63rS^R@H*_l5 z=fUFICH?p==fRY`c#-I{?j^Z^&r0fXlg`0JkuE7R-yr2wA(2j5K>f3<5jspsAP}y% zw;3iHoQWzYwpNHuCf+XABV;d;_!+9XaXqKan9}lbj!Bn<;*q;|LGSuIdaxpqS=z-t zUJEFXZG0M%Tl#wBL<=o#DkBQPjnkmd(4fN~xHQp3 zxRw8S?f+N7@N2C~J?hA>>2AjVljCE-yz7(kF?#BCKIe(>{3EWr;UmH)mw8R%EAQ#{ z&&MssOUdVoY1iBP^Q)2`F0M;mwirL#C$Znl-?DIywrC$$<4gaG?w_MQ*f|0*kxXCD zbsGwg;ioeW&4lM;`y{<+)V`A1GGj=`f!z1?4xL-!s_PnhS`}*DxmW>?`KRP~E z`u|rJ-qxJK;4y-9aMQHPlc12}=kuT3r9WUY0=jdhgmo>=(Na0Az5(Hy`Bxvq`t{fO zh(Iy!!VAZj7jMSbYt5N4-;49-t-ogEp%Y8+-}{8i1S{W_1#?v>w=#2Maz?k}=8{fE zA&r>?wpv`ePEyjh(W}z*72WL(`5y?ohk+GAoT->YzictTqv2)*Jsb2f-#lFB0p9aH zl=YrFC*Xid-Okn+WAdI*Sbxhv1$K^@->0kTC{cyYdt3TyD!s8iUp)oRB!KrY`C=d9 z?#7XlVFB#00^z6j3j@Y5#)LkAv`WmR*-13-dXpQYZACFza`-8eqW1V5dL=VCj%%&@ zkXVbzx4(In^ivIwJ`eX1nI2A$dmp%j1>gixa4aRS-~{4R$;uk3ru75!+SXT_^R!_F zvF**IRUDH{50mk?`YM-X0s# zkLQv3&+W7g?=s#5_2UxPt;$gW#+X13J)W;P2|!{24rj`j$}rjJnEnrk?)t^Tsm~X) z4p2k#28DI^2HD0C=6gnvUOlF?i9xO3bs9Xm<-!eqC&w1hPZd$9k$a%i53YCe1?@2W z%+#*YiozuWQ>aJ=_Xxb9H(5~aMIL3_lN%>DUw}Bp+!V5;5{+0s9HqJl*hBD9&j}+y zm}h!DpGMOb`x>0##~J@Y7lmC*=4N3q5xEqTHpH)@`y!XC5pd!mU%6S?Hv$`!BGkBt zup_5k^|?%g30Ww|PDHmKJ-@jxJNe@m7t2b0ixyx6x`ZwoG%L=q0=&pzs!0f;F`~?K zyhWThyEUhBPGFMrl358`!@2pcgDSp%nll6L3nKJd7iK8QAvY@$Pwo?WU8s7pUYaXi z5)42QPppsb5~b7W*v@Oi3)CR9_PWXJWT&nuzZ1m}a#Ck+*iL_4@^B8ZPj#D~2bvM1 zAxW5X3tywyJTa~N<06vuG4FJTy`14k-c|v2Ei|~wlJ|8c#y%sfxjfWiST>(vMX7Z{ z@$b(8D{D`kK)XLq$9=oT=Fht0%z+#z3hzRo@cv;8o)iI;X0XLRjjUNlbcfP^ARtIO zy2uXp$F#|)7Umf5>!=XmaVOossCHJB#(y5zD$=oPvW~aCnfB!}^;nhX3;w=StTATI z$$)~4*$OaI0%uU&Ho0l`VpsR3$mQrCxC6*`DDP!v z7y0WUgF7?1VCcpK^@}6BnfASxjSg?!95Gd`UUWXv!TwBp27#`z@H zfbBBByi0InJx}%%ap%QIK9A$wL2+~U#<*Gpa>QmZyJ_-N;*MY zU=}xO*~w3tLHw>h{TV9Vvy^i=cJ;clWS5lv)9{b?;@=j5!L!&NMWlMD9sBI;Z8+Cu zkLIAabU(-~inMm6Q&%m4w%b#&(u>4#GBzi4MPikEA^z&IXEsru%)K<=&j$U@Z(f2T{osyLvuWFWK{iG# z;mVSyHL;0`;Jx*7rFh`TXQ2%BFE^G|qeW~6c;!t)q!sPjiDNESUwLhFtH;@NdJheL z#wV-aB}=rQf4b8x`jq;*HC%#^?P>h3CbD9zAR1|xK6}TTnl&~!$z~rAm&FZ%xj%Iw z@u~NiHv~uCXKV3L^9YSZ8Uc1`;_erD?e+K6ty;MvL-tMx2q}jR6jbaYnUsNfM2R_N zdzSp^_h895X>gF{NyH81lU{a6JR7*@#Le{elBPYn>n3aoJE9;1M~#Qr-e4q+gtBz; z+3=6!eSU#>t!Q26_r`X%V-aJD-`G8CF29(WW{3j8`Rt9W#ynp$w}mWbAicaa`bzY( z){lb3rse@Pc&1MXjL1uh8v93;3Pf~)n-fP<26|@?Rm85P(%7QadGd>O_94rOh8+?c zjoX+R0<}9Bfpd#+JPb~wJcT#RQPrtE{O3!JL-TN^^-|zGxm{B?-qcz)mCLxww z1HxQbL3d4Lel|}2o`2v9L=5CvtyL`7^`Doc3zqwYd9PC5qTX#GJ#gYhZk`y|W(}^~ zuXmwaf#47X-ElrnEaBh41K-4#js%IG9&4D<$PNiUhfyo$r0 z#2FRFC>k@v^wF)Kuu=~LeO==G~MbLUa#TVBLj zS|i;75vQVZ*HRN}U8d$A>Q$(^Fg#i2>sAx&#l{H|eRQ+uWENXW1vFC{jJ5sX5*jgNA(p2>Q4F^Y35p-rrE?5mLqH(F_vA5fEGo?%$~5CR@Ij zeEjUn@*E|PUm-nrc~a7-3oe#neQocsI!0HfgIE8yXH=7Qto;;NP5vVegy@WEpS^|(Fq?*a0{0J(SZn8|bfc79U8LXud}m~?;!kYV z>Je$+-~U}-vdRX^!w0gAiJ_&Aa|eTatu z`L>w>i8f$lTwf7I$DrC*re&c3M7@Os; z9w71ZD;`8bk<|CVX46x$R4V5{zXNP9(4q;r@K@6B9+lOR$AD`QIc00(cZnF(*?fps zj&Vu-Sz@rGUiY$}eby#I%=_&sP?6GfunGN=58%+OxPf>fif>Y{YJR;|;Mbph3wWzE zK1r~}LcF37HHKs+M z56`pkVYcx?!)gTYZ81~G4Ml;x*ghSsEv}y^uis-hxTZp-)gLnndnRd;5mQZ!UNN`K z)GM12$>3*p@rZjPb*a0opH1 zM)Y^SBrmR4eJ`%H2nl<--B3YT{`^)lm`=3=OPkwHl3rP+jVgGwaIhX4JxzYOa{!-| z>;Yx*XD}2$*B~EKV}@Wf(TM9spsd;f&A`k^rIKi3env?1#9~xQr>*9aAPfRAPhsYe zOZi={SEqjIX9X*qvheno;mZM(W&9%m`{)^$@xBGze0y;{(+=-4Yu>5XYP z%;pDfo(PjRhqu&5$hc5*>A``B!hI|Ed~d24iPu*$%R?{9R5n z6K-k-t%ET^rm>;EZMPXd=JvE1LiB4UfYWQWV=?%BvEeh9GjV9zn;}4p$(woD(jJ;D zKe(6d0(W}kQ~YZEJYc|NF3lvBg;RQt49y1R1ljCDtgt~uoOp&&S?uv!LHwD_wgw4{ zw}CBguO!@IQ+Bv1HdF#&^${Oe2blCD&RAx5xUj9KbX~Qk?9OvWec}&AZ9N(4Z%tgZ zL02lA%i>8ltzihp*YGqTDfxxXd_v$f@1qwACgK9O)k&}&_T#Eav?)dB_ESQaWDfW^ zV+g#4{S3Gnmhdn`=Tji_4NNm0WoT=mjce^xwMH>r+3H62@JCA}HcODjY9d$|1z2TM z`$*xR5(~X1+iOOvf&g9H6fpT1kh1kWno~)(C)6b4(;p()v(#z6{y4J3HZ{9ZUURq^ zLa#^49@3W}^>W`HxGJG7WD^OdUG+<5-ylvI-vsfdwfDZkEi9Xf*uiro|y8S{|UKgT(Q36btQ z680i_7%fC=?eN)zvCEpBc+!CbE25g4i^4+f6W1D8l89qxoh6fYsiAyK+;y*w8v!en z-^Z`&bs$EDVFdPfO7PG_&Ov%BU+M|iQKHrhC}CE>UQ!4kT{yQw_8doS*>z4lZDgnj zJ=e44-^vE9-k5Fl5c7SEza*Ax%?i^0QUOl|2d3Pp6`#z?f;7O#CQkE|?9vyZ4E+vrRh}D1IL5up2N1;Z!ZQqFvT%&F_%=@U_3D^U4jiWde%n5u z2jg|mU0DerR_7br(bLChI(jy)t_t;U>^k|WePZcz-Z?h#VF0PUI zQT6LX;`SR6I_Eou+Jb+8$UzWlcsV#QpQQwj%Rxg*hILx_P<@*R*VLRk$SH=%(d7xz zPt(^zdF=2oU?S2$hbqxn{k3XXU@#fT`1ep~ zm~ZT;bFGR4Eb5lI`~=ReI|R-k^UmI)7|eweL_ZNCgN-Si-<5@CE_De=bI68hf-hH4 z8S=a;H1q^WO;kWUEJ*!-tZY{+GjpK zuv;#siG7Jmk=atPa3w{~MRS_IE53_}mB#_-$=QR4{X3L7f2LhS?R2$WJAqDlKngZY zaaIeYQO83^uwgO8-20jpNZ0%%N!17yweqF)v;@r3gqx$XDArhhLbIerJayP^?&|9w zY54erSQ4S$m>oSb97U0k9?hQO_f=Q#p&-q z*Iu^X4mnVdIZ!E@B(bC*d&H|I<+Xd#G*TlxsH>9vmei0bjcg=}s!bnWdzEPXLJF$! zmWsGEGBHVvxThV+CU<^@%%!IDj5g9XapfDTmH{0!+EbXV*Ds^2E7&FaE$!WlOe)8# z%xiPvY1FooNQ{;>I$C;AVHc`RWU}{ht@=%E{Cpgf7gE_|bP#o?uBwyFtjd-HFD6J} z^xVr-8#9+R{78-TgG0t>CZOXPX;0i9A0H2qd&a4p14i|LBAu6_-K{y#zNYPvNWd+D z=s(H_(;H<2TV0vEJfL;)*eIFt8m}wBAr#nMx1zM42_p^xD{YAtoiOlCQJ9K2z`Sid z5;nszS>9XFS=wNfKk99Z8PY^rtDUWtd#(z$Xr2ABho4(63|=NbJLZe}Tl6{huFiUw zp6+pXW|3lyUw1Qjm8VeyByI+^N}U{8M*M|{aspLfe2Y(>gv!gSwIlu59iKu}KJKI< z6Aq9RYHlTB2@Tk~D0tyd5Rtcv{7yToFWR=}ja951#-h>G33{(HVG9k_cq4Z99*Yv} z46I~-gzab{(i;=|cJrcrO*k{i;3Y$r@Z0vHEGd{Wiq_cs-rmvBe1hYpai=73Ij8H5 z3Xr_<9%5C2?N9JuY)&ZO8E%_AcTmw0^4@mbx}#`s-zxB;5&BaGi!8LklpTVN-MX88 zr(IrjgZ6l7$GaR+I$(-ieyFh$XuhC4R#K9`L)q~!L#eSR)v z2tFXFW^;%@oh1U5rqE%Qm3VuoI;ov;1*)T)&*TYoZr5!BJPVYB*{1MHR{PJ|Ig2n= zAOp{&^+Mh*3`e}vtp+IQ3*!W&wzAy|_C!*$v#JMygrD(6&?U4+qS+5rd%HNVFm@bB zJM@VCW!k2vFvVI-)XecW{aI7oqyVOoNn~zL&Yb}EK+I@!TIXrptt%W8(SCSyw6XQQ z!SY#2GJc4l`PSyFSgQ=LJWdpTu&Yi1*D-IQ@0`P`m|&uesjqUc@X0bEL}bpTvnSy4S-B z%dAEZOIlha8+a$_W@wHKYn{x=>-X`%^|S;kZZX=g)#?lyc3AAN(|yKpg4F}=Iz>0T zYc6-Kp^fWEk#M*!--2l&qhTN~j=;`1BBHe{es6W<*JkJUlE3BLF0Y=GtFK!p6}~dJ zN3ySmsh;=4lG}8hPKy4)YhwrX2)vS21d+1>`?Zv@Rq-e4AD=D(oyxvvh>Q#Qq+wK; zrfCv^(-AyC!p%yuv6iQ;)otDrUkp=ae3v3C$2k%Zyz9L-J^h-5&Oi#D*2FdCfmhev zD#m((JXvmLZU@G+l>j98s@UM!k3t~-v-vn06MJC<({)p#(aw*~(c_?Q5-^1Tl%x~62pMuEY z(mtwqTco@&N{iW~xm>HEq&gRGyw!v;O{gDH!|N2CdPs<{&8GxlhSNvUD793blrchc zeu0Dqv2eSsxTP{2Hzed0qRP_dyTY!PM(v_DN3s`TPK15FbGz)XWl=`G1 zf7^@Mj%7@fx@JdNJhQbv)}4m3==26S)Av^kBbpn^>aaqD#*w z{JM+@$R+P~1bxt`LZ@CzB+CRxai-M^6&o|a1Jjt@!mlnp{~!*haCibyy(CanWR72v zVr9$pXAj^EBE<%CF@ zPBC(*8!;G2R@11{g8HU{LB$k6TdQ78?%Z3q022sHNj(^v8Nsx&x=%kiT4O2NTWw>m zmOqLn^i5WkdK4QrXbw#CR($O?{j>Ihpc#vT8Ba-kt7J8~@V)HP{W{t*^5tsDLrdDq z=5)-FQ$lY^FY}%LL`UViSnQO@rN4(+p)(s!?I$egiKkJuSOZib2k|Hc8d2k#LfG8p z^}8LRG!^$NGIsNPl0YrWZqSkg?7spoo_(jUpiZ)&Q95*iUh7}zhwvTbG#f)j34|hY zs;i0RtKg^W9sI4dj!#G?*eWR|!&@}9f0!4m>zgisfVYOl3kzf3%S4t}>xOtfUim%d zaXMkTjwo+QnHT1YOw?9el$0f%G5~Wq>l;k6dC6mTFJ8FfnRzN@RRytXde*mL;bdf* zDS6MOIO~7?mvOPerK~(^>v4o}idJspJGcLE-IF#F;i5I_>o-+|y!neHo{6Y`!wG^2 z|I!r~aGPR;NH?gIvz7)a{t{nXvwrHQCHl*M7k$+Mvy{5adDBd&dMctWTuK2={!^a2 s1p$GCnc_(Z#Eg)<=QUEncV8vvc@cV7r3du>eRvO%037810g6-g9RL6T literal 24336 zcmV*8KykkxiwFR_c2#Hs|Lwh7lj1tEHhlm7irH5?Vj?;=7+ABIh<(8sQ)M$8jeUph zYB1QqDvox%KmN;q` z+QexTqw%P_a)ZId4u&}=EJtpm7=HB1ffE!r@d3-8-`wywEfbk=5IK@J&0r zsd;UG^DlMLEjaYWIm>8$hnG*CKwpk><EykPn`}EVnm{(+~ZE8#zODcQN$gLm68suAPvM(rx;Cs|PPwxW4HZBltjQ zD%0;yv*o*i@4I2EI2vXC_y5(6>@1ga@Appsz0+^q=?l8kO{2AQ@y}j)YZ>m;ZTO1S zEAQ-J;K3dYZROyrv!zE~=#F&X@nEBYm$i-J+%opK(-%&&v(X!WZfwKfj(cbYsCTt< z!)TOi`A!aAxX}HP;?MDAPB4t9*4F=2+SNf;kIaG@sG(l%Pymnws?WX?Qg#0!kzfH*BXhM$m zRN!x&<}fqLb^VWK2RCKL8cj2}+lR_@>Q>4*xZr*MqvR zH}t7FJ8w$J3yD2x)HCi#!xztNCA+s9!;EFnX?={!bR^2q8|8H09JOZSdT|En2N=H0 z8brpQAuqIzVdNw~OjphjB@J8g&mM;R!^!5qVKl!@oo1OJ#>I zwVU$^Om&4~a+JXW7X~)LCf+IVb|t@6D^nX|@Atrk(YB^`!`(qRO?J&Ys?#vD95=fe zr>Xt%9v0!}&el>6I}-+BDMbj)3v`u11-2l+&3jbLx{WO4!o@7)!v24Z;@&a}({XRU z8Z`=z?JfV(y!@os7_LEgQSssL_+lSq+9iudUN{JbuuEE7OaRoH4v96&!=Hja8e@a( z=at!8{zYCouLmIKP_1@CwTUlk;hu-*b2VagF^6|mBfNlb$= zC^%ik(xKg-MDQguQ05GL5s2gf&keWmJWPEU;)tnn5T-CrO(E7h&oYKE105_JjAxOH+tIxs`b3K}9T7hYVE<6>K7TapZu~kHM(Cd!?nJz}^lE3%gPD7z(n0 zsAo>owE~(i_pLF_xXp6Z_Ws;mRi)ma{{yqyfLVjtwPtit`q>?PDl{~^wg4lwu*&e6`rfIOVP;wUeRa&H45nyhf+gt;=3BMiU5@cZI0f(>@RB*Tg{V?P z7-5H4Ce(y@A^!p=m4Q`9actuh8KwuW(*h#^za&<}L`#j+Zvsk!}$_qXPN4=leiaGf(idp;;jL0owdGchmHf%Lh zA#MVcU0>QkaUILhEu{77QB|UD^o%~8SEUa4nW=9ROAU)so8pVn=t2YmMxq`&DYrQc z(eZ!*t-{utBYa?jjE1nS5R(`iwZGno^D2}raGj1<%4lAhS#L1Hg&9tp%McvVQmwl_ z8c0X!PQe{uc(H_i8ln|PnQ8rbUJLiAMi{OTClAm0ppT%FOpT^~>{H??G*VLlMMeb0 z42I}RVgD90Ru7S=8)8)E}YBbJ~zzTg0m@aDttBgK< zGd2!BI{w&v4al@m7ZIKTM+FA7xCP@GSV1vFCxw5aMaCCH;s$z)5kr3RR^J@?7HS+~ z2g7{Js0V>&v%7bqz%5n6Dt-o0`M z7AOmTn-~X>;~!vn*YWRK=-|PhY%!08y&uca$imk0GoS_-l9g@ZBeE;lt5YjCoZ@PO zzM;^kW^lltlb{J@3zlb#FJkYYF*eaBFqFCZ1fz&?fVPqQ5p=1rNOc;@HOnFF`;`U$ z1JwsY75oyPc)w9=!Cn{(6^9Kj5kofI5dkbnh4>lB^>ut9%5#k$25$%pm{=KS1jeF~ zj1Z>^0TBk1%m~uQ@n$=6r};*0_QetRBSGM2_I#Q-M2oscbMiQ_WgT+6i8d>RI)RIL z5(EM1;XBAW6U!E713%l-*ajYW*t#&o5~paf#SXgP!PfP<__-5l z6)Y?HnHbt`(l6alx~P|xa+`ZSwUS{!q-kv&mVl5+QWIO29O&cPs(}2eDwwe>_#T?d zAs)_hYWl(j_y@=?!Oo$xhr9&k1wSKT`h8&?UFB0>?0xSY{-r{!V%3Y7)c7Cpa;>i( z&WodV(5oe)9dQKvn2Ma!4jMT;s3PaIBlgRWsmQT*)UxgVU>z?C-8RCGIq3ab9^nC4 zs5L=E=J8@w%1!)Yh+;_+{@#oK9Vg|~jbhoPSB5+T!?sRr2wce4A+iOPg1rjKHY7uk z^=q@iF%B_mtw#!?DRjTn5uzT!FYtj!jH+l)sT=K4%q3+-AQhWuV#FkLkJM*Q)aTr; zAuklS&a=BFnKFlXpW8LJM6jf8j7T^v8eQ?A9)aaXQEUl*6Hv6<9_dsMN0FJ=`-7Z5 z^L@Qx=Je|1VD{C5UR6VsmpOB%W`C~eN_VbR&Ae50BC~G=ZKHG0jMm(Y*8ropc7Yl0yF53Um5}J}91nk8$l8s4JQ9B^yz)B)B^lH3NsN(x z4o#Jda0d&H;gu(kK`>b_3MIJWCq%jt#5ogt0SS18jyNGblz99TzcLPkHY8)IWFVEy z9NHBw*DQpb#CogsHNI-1Hse1lWRw^wP9P$Ra7p7=#w$D1A~UPSD{Crhq*vXURV^u| zG02!o-8&yE#$QdoBF^wZiD=R=NylFB8kpqKM=melWJ#WhU-`YazdWHJ8(BhO*6Mcy zJ)#_scE1y9vsytj)SPM53i`~Q$FIc6ouzt|mhtFHNfZf=3iLEG4cCtqi#|hgv~E^h z@k*+iE4=Q=q1J_&RLcKKghHQq;{MBh)MAJI2l=>Ov|?TO#I3dISQ{3zV0*y zPG)lY%2XnYu=s4A9-$z!qr7&aMT3yNUL0j5(Jh|xR?Z_M*U~#C%iFM#^Mavy?t~?F z%(&fI>=IMEv~bwOo0(I&J-A$FWl7|jUaq`Wb#7WN_DYb29%=h?N^$RUu?_hclyz1x zY{gTn_RMm{uRL9v$CoR9%8c9I5_*efq!OLxOc zSM@Rm*-CcTrYmt>hqT-qt-b5@=ypc7=DAbV$&=#pLR55T`) zut+iJ#Vt|kCFm#uqwJL_eDwUNW5BC&9W{?%c@r`^NiRjYc&h5Q-3jiRtTuW)5x6yMJ6@%o9a^x|L! zg~17Em$+x_l@CzRF0s$Jal4;{c8Pz+WcdIM?Ggixxw8j|XqPx>%=bJ%MZ3g8i(mN& z8SN4ejmh#wWnA7#+B%P z%GxEi8uyU*lh!Wr)%Ymyr>$LLtg#h;f?AU1tZ}`(pSpI5wRRj|-bG%!#9K?;+1>QD zOUyN<>JJduE^*h`C?BA(U1G0sY~y|s+a>-QbGlD5E<^?!ALac-wo4qgBqq9>%65sx z#x2qPWVTB@wlg{8_YO9DKf>WAFj1Z8eV))wvEV+UZ=yZpprx|V?RR7NyRHFnD z5vo5|B@vN>&%cq6hVBS)XHd;3?nk*&S|ea96lTZaf2?&R=Qb|Gf06FIN zN3v`&ZJ}O-72f!?^ihhmK7G9~ao8>p#GTp!=~Mh!6A#Utb(~J zi93aItzg1i7pze%*A2w-Q^+GTCXwx?SXrFXqbl`p-IDq5a~14LLT^ifV26wL2<2?pD@P{XZz`G!hfHqDty9H`Cj0`M9BYtvc5jIJ-%7aKNep< z{P_`#y#>zf=JzKbx%oTRADesLkB?t}H+|PP&pq=%tQxT{xS+L4hK7g%EaG?qKWM+* z#?*j#0Ry8yFfd~A!aJup`%LZOAPF+F*cCuQv1Lj^UXfc-@vd4(9l8pi~ zJ|W7)S7}Heg=9j65l--oX{4EkQZN){?5mf@Lqt-B%BWu8nTkm-Lr$0s% zNEsenV~7dpcM!nF#4-%d?3h!=q97!?SR!&%g>5kmqC?V5y7_ z9!`Saw*tMP?cuq7yWg7W4KuIxhYW1e8u3O{T7Mx+xS;Pf)vGt74XlsHc;_M?P%V7~=+UXb!oUoD*GV7pF zq@YCELR{G{(bQI3PNbnm?GktQy^p2-&PJ;`8?8Si-hcVg9I)29uvPGh}Z&3&aM8Fkzp# zJ)S*QIAV$LBI*<~rzd&tthS}cxM)!mlrAoBQOlK>84fm~QTslkdo-25LaH|enX2q} zC=^@t=jjBM<5xUS^*9?^p*=x9n%(b?;{QQyV%CQQD{J@YUO!1j7weM@JgD!{p?-j+ zK_`q~f%FwF%gX*ZyYz>BGK^rjhz(#$iNG#q{f_`KezqL+#0$?9nM^_E5ScLA1q`LT zf>h80Rp)-397xiqmNF1rRGjT3>WwMi97aDy4f#IG7Aun#sZhAp<%Dfuj#v)1+;77~ zEhDovjqd!G5G(yTUX>sp_o#=gXDtNbhegD-(yL>+3VD>{ujqIp46 zbWc;Pzy!r=_bnL4fJYsSs2v9%B4hk7qeR0K;r6Q)HFh_Lm<}fR6sU^w)TdfQg)QL1&f^pnEpk1rU!UC|Pm%C`HmPPx zl$cnW7GZISrKD$1t+pEkG+W?lLdsOcOG-w}WQhY$e!Y(a<#eWd+QluX zZ|$HMvxrHoZZqT7OTtmrYhB6A5_}pnTiE-)-Rw?9(~%Aa7hiw;N~TsoeNLEd+)m!A z(wB6ZEl&a|orY4qeR3EhWwG$+F-$n_h)10?8|4{aEr2=*P;BFmf9ZdiB|87{D|%Wx zaV$(=;)$LIDQCDqz0iw^hJP0D5Yxa`tG z^``GoA!LStc;D7z~G^(~w9`0$!{!^f{}(nxmWriNNYLgSW^-ngx= z1>YQ37yUD+t#QbhoiBrz-MP=P74Kb+8@2x6=HLaR7QP{06TF}xPESsto1zm(E98~D zdm|6V&3S=QW7dq5-ls{9WS15i zmnDIa_m9XfEi{f_J}$eo(6}X1@jRO^lW`+kMQY>b#fRQvp$VwLqq0j2?OFhHRd(vd z2!uy&kIODCG`@T8lAUlL!0CqNFy3>KulTv7!goXDt46nMSf=VvM!w>Phk2#BE;zWM5&45HHzN$!{;-i3O zLU#5FXUiOhz;k=d**Yf-!<5 zOUjmt)$f6rNXqZY7X@pf{82FL5b@O}O_t{+I52L~uZa4QcK0S_+1ZP5;M45v*fHSA za0!iDM|R`pl~r=gP9nG@8psZpEc_#Z@rl0%D!I1DugcB5s%$*kcnwr?OiH^=Zb95= zLcT&*X}UUO7@Vsg{s6W&#x&M;zs4F{+fc}`SAZ~ z95X3D4fZ~2lmpWDl)IIJ06@Tg2L zAC%iUfhCyR?$I=#c6G9Ljwc`8tCN+V%Su`dUGS>ODWu)2B4KXazH+XnoP2dLFu~T9 zkt?I4$jR?0$YJgW3tl{jd=0Y%g~k!LQbiIaIfD@RTE;N3;|2eox{M>QgbF=L9D^-48oOupz5{1T9 z{dU>Wn>;RW)a$9#L(qwEoZw13qq`15@MzH1R>o465%?y#MrSt{vy~Z zJ<6LQ6Z7ivYUU_j1Uscq$GxD}#?Rqc>kk}+_`w`&uUvM?#gp~*JuH(SMnlYLJ=tl9 z@18^_4V_T_lZFz)C07nZNCKDm-6gqlOFp>djsp=t4KA6D8})EJ(#hqq<`>7RFMG&` z3$aNKd1>F?eXN@EHeQoWoU8YwYh!q=V^fnaqsaXV83o=?H}K?XshPtLvWx@sf+98( z8nc%#C}OLP176z6M)@k1PP#+%1F}1JID%QC7qN6wA?w|;Yl7K5lm7p7zIv`LJzt$% zUwuRRdhRU#lh`_2o->ZNU486WHNRfzHQB^z=W@}};zg(OF?I6KWD!4xsiQ)GVi`4w zaP_=MZ;}*OZ{R&sMc!3SCZ^6L`ps8=ETd>we}Rmm>?jlY;F7#9JzR2pZ@hd3*opEB zbVFPWqwx!s&O_;ohFkfH zhV8!ejhP;3(Eqom_yxTpdx}4VY-@cNqkZCmdbRm>fVk0BRECrH8?OTT@tx{0WV3&h$#FDjhXb7r4&|HfCuRc8Tk(5 zw6C0xkmfMXW^5dM>;-N=Q8?t6n!Lv03eA1B>TBA(Ub?sRxkgvqUV0LpfUWK2Z}qA+ zbfOu~!iw+DZWUQGp%=;V(w*7&$kCI&0NaSKlTR@wt7@`$mh`^nu3wn=GtT$= z1=k?A&hPB({_G6oHjut;?$pLhOWp%_Z7Z#*-Eeod(%o7kHzJn|4>QYgvkPCL{qY_a z;pfiQQVu&asIM)hxWk%;o6|r5MFxVuMJ*dxNGV}XYJ-Pjtv+DEPudh6=?QbDj{S4m5(RIdGewX*!i!;a|q?YyK z3?>$PJIuOs>=%BkdI#{E8#sU z{Zc{tt{@VU(zJeUFaL#F9q`HaXt)J)Qn+WciiDLHff2kuek8-gGbK3=kiDi27YZ+l zFS!i6M!|OixA3U3OFHWR>s4nW`NV6X{Cx&{Mgc}Rmk9koXhNw;(rXz42zNnPGV(TR zhf5_V-e>SNAcSDm?{7RX+`vS0)|s-?PiKN3Zn=y6CtptHl{Jz*c=8UQ*&QXxZ|=@a=p^#RgDPJ?(F``$;lB8Nf7HV+H%Q&SeHB27yOPZZF(LxbV8VQ zS#~wqdp0hH%GjuHWz5dhQ2y+!V`QfvL2)Z%c6Qr!-zZJnMKNMT*)lLTQiHF$<2Z!#K>lCaBE*3Bl;=X zS$J>#;wNNhjbb1lHw*S~UiuRX;BXHaJCvyNPmSDJE_p+TT6Tr!CZ%iWa5#5oxoj5@ z))%gf7Z8H%YVyN)OLL^j$R~+vpcKIg^Tzr6ci1`maFZ-CCp8BAsE# zaE%Sf?+c^HHsp(%cOMzITTgG5Lt9z-X09XO`EhW*6X_d`NZ7#pfH$2#L*`|A`J%cR z3hcPG%)=z;zvuk;fn9Gd@ijJncR7wDWFG#%CNsNXk6)MF`Rsy4!7s|pL=*hLX(M#9 zV}p`aL`WsU)x%f8qL+@OUxh_G@}I?@I8E?3fe(&E!sDu-hqs-}Pm0-vS2>HMpzxI* zhVb{5X|&dsjF4#N-b~|XK%&F7Wne>ee|9Td^hYypJ6ZV|wg#)txBMulJNO0eG_(~{rlT3U{6{mq z@_KAo5?+1_3ik`#l71~h{&~HZL3TH17re3j^8_j;v?P_fe#3lFjNq9NBISjA9sEFs ziu|{^K$g3+gW_gvXi_>|`HyCl=R{_W;@*adxDvK{t!3L4>2Jol@v@B{bmKg1-h%vB zs4?~}tLeUoxqOT7xhgyJ>|t<*tG|f3e9Y9jTXwLs7T7{XM)tCK_}zry5lHm0Tho6m zBr4<43{&Ex8O!*U#?|IFS@*3m&A?YgZST+B)do(zKmR8Q1~;P|-tpX;S^ezJfsvTT zeD!w6eH0MC!_S1iS=cZHdLYwpCuEZXDgYMwaj$XS&-no#PaN54~N zD=wr263sfn;k&8$ymu3P4AfYRCgulJB%~%fM^VOv5ZYipGWO9cn^b}JlP-4as+8NP z7}#hj{x?w+wPA&}l9`wz2y8GzYj&4bubi`Ut+iDtZPcpDq7Ai{oqqI6p;lG1C?`;+ zHMy|<#wKB6cQDQ;5}SmziZY@9vw`gBzkk?{8u`ECSH4Nr`19n_d-)oFYG=r)w@v)6 z1m=_%d|hs#?6(EpdjIneB}6NaCVoGV5M4WfzwYiHA@=^=mq3C1UT|X|*UQ%=&f;Nj zm}q`kv7At%OQ32wts?t9DQX?UU2thAIOzz{XD_>#j1XNL<{g5(Vu+BG>o_;-HGIJ@ zS|wZ!RIZ$;k>oCSuiPIS?sjZgD^sbTAcsE%s;j+411!RFc7YXBWBIS6_~p8eOGi~&sU(VHxPTmXYzU~L~MAG$B&F=|JV{}}Zv z`femC{*%bRBmFIwYN%C9Qaas<>`r%9lCUw8e&JeFJg z1$WQnt!<$?k9pS&VuJ%I%n1bfX-{k7B8Y=?{FCc99HR&%PEa#p*^84ab zNbP<{`U_$XQ#pq>y?wD!K-g?>4>lWd$Vt4f>9v7B=c7Cqv|!NOkr7_*Ph^LegVxQRIDZWJr@$Cf5|e)#{ucSdcH?kN z+m@)aq`DWg4LLhYzDo!%gWP)3!^;vn3Xq#8yQ3I3roQt)R`ItxicqU+^4B4KE3kIX zqAL|7IxA)5heS=o-$-~t%&e6C6)uSRQ@&Uws=pGkOJb@ufg?LYsX=W}-N z=XIS6wN&RXDX*NwXSUdOQ^ICKv&)yW%hOEARs76Y!rB=q&M$D2*liHrkFNKC?Bu`H zgfJYtSNr?_#O$o!kv}_I+loH5D-zu#rXFtMX2%h`X! zcz4Jx{0-wh0OK7VIq>~sVZ68cW=5j1QA?q1JqkOg34a=XWJ84J1nyBY7YuyB!t*^I zAM>+wW%v^>63oEgTZ{L|+0bKvzVC$Do#kbP*Jd;0h9A9;T;DR*8*OH8APn2^jA8)) zRN+7P@ocdB4~QS+7r`=wrZOnPS3y8nU%mC__tR?XTRSMS{v}(}=z{U@JX$Sf-DZy3 z%v7$1tpzz-aUIRX7k=j@zdI_E+cp*JlB}1 zukJClVl84IfX=B+LO@$tBMn=@P6Q=GG-2h0<(y>b=Zf!ykG1 zQ18tr@4a&MSk3d3e)2lpNm(b@V>GgKo87$=EqCvVE_R4^78aFHP~5|xoAF5BfS!d` z_-F2Y#7DY;k#2;MZsovo)OxbFfz)LxW`n>y*^Aj?^u|{F1syEDz98nh+^$4#oT>AC2MZ>! zqiEyi4i;+}adkB#JFX6HOYxO^&ITOgL@zOL?3F3K)1NSK{7~&{pb}I#&L9)JRg`zn z+(^QY6TF5Ri(ZvS!RGo^J>B??O|PNGstKqgRRLDb$zOmSK=NWL{t=g59!GI4U6sU9 z)LXswHw1o!X9KZ?RO8d z#|p5~FVcf-UX#*MnDRS{nL`eJ>UboeAQe4U`vp1%SV>@Q8ij>5lI%>18nxwbNZU+c z0jVxthYdWJ{o-{Pt&G`OXD|O1YR(ySK$>E9=8<~Ek1b}uKWON<*?3LYaugU34M1+* z^IgkPUvTt*bW*N-5y$B8!ucN!+JlW5$o$1wyw6!y@Y;e7#oFTI(Q+Ae- zzhi(HSC1+CZ>Ln<}rOct; zSLMHyIe=GeEg7@3?4##-eqeU?g3Fj+no$gXfsCT36p!jY$qCr-RhJ20y2Spv?6TLQ zI<_HSbZ{|U%)Uu>t=U+{j3O(4M&XY1KP2oBJsad??-1Skv}L+9PtE=UHHumxK8{dd z%gDwX6n+6VUZWn4LkSPuDPMIrmf>24ucMMn9`Xre$5s83-kEpbox(TdYbe|2ItHZl zt{)6@GCB$~l>aJ$fr4kGCQ{WXX1_p+ji5+Exvv|qaM=4*{hoLwMU>&2 z$4pviUvR7+F?eEgjH|M{{fq^#>AvPRP!yWbrSppHmSe+R+6{j(ZhqZ@{Aa*;PELLp z@7|8aKMKZk^76xYVP=6&M_y|i$(HBN?vdBpMsoUs*gbN-x+6@0v>hmN!o8P~4&Nf`Vf$o);mF^`@s33e)OZ_hIwHKn0hM`?A&Jy%LcAgka54V;W zu=E{r$0lBi=NvhZnxfAjFJE*a#5xdfsfXo=2iC=qAXpd_HWFRKTd2n)ZQ_Ew#$jox z&CD%?SKGMFF8tlZzpvCvX3{y|+*hp$uY-|cyI1^n$Q@xvX@9(j#V>jYJItNwVK!nT zeK(2${Fm&C5gF-*jx-bMUS_ZN;7`)a75?tw-~ap{ShCG1rzcQ-Yi9McI|l+k)0lhT zM-p`mm`L9-pyk;9ff=_qZz?OMju(ZPDAAi9Z{?@lL#F zH!o&v8QY;0bm`lnsC9n_ zE5E@}%6RkQT#^6g#g1WRaK+_2<=*Z)5}$H!vB27si9uFqOHVb1q&pgeqRDu8abA%9 z@Z$Ln+p^resO`0i^fxcoTfS`+kBBMK+&L3z)tc;g&Unke9?39RXz(IZ0B%|)OWeCr zDSQP4#fbs0Aq5CWe;E?h4T@{KQOtpUE+k~*MWyIhxbFCVV#?o142VjE@Ebd4 z@oPV2A_jo9`UQvqm)R&kvGHeAQsi&^Nv>s(u-i81o&47wKhfQo$?`?)F%%fb++M>T zLxHj7c@cX|>zhoLud4OjNLaa&)k0)I0j%JIclZ1~FkZw{d`)5)kJG)3!&<70 zq9%Pt(F&YE!mh^PFr@El9Jn%`Nr`HT{AW_EMQdmGTK1scc-wL?u!o89=o%9^=~aB^rRks0q6-g?2XyQ555sJQbYM~1hw%D&jbC){&qHA zq|G$h*?0>VCffDSk)wXznG_IQNB(|pxVz>Oc3+6pto+><+~zREGZZpj!>xt7_bXf_ zaE*fe)4DIf-gV^fzTjCfatZI8iPXIOch1;Ki<~sVb7V3)-MRb}_>eM^3m|GoMo2U& zNeqcHS=uu7#?vBA{@!@a?lRr#LxyhKAUjX~Zrkaf;GwAUqV9pz04h2cH~Zmam^%bnV`EY5KWEuA$E$j6GZXxg{;V zGcryG)>pGy?WTTYoMiDl^_@k=JW07Bt0m5(6~VKiHPU3v&Z3F@+1Xu=Ivjq^BD<8?UKBVuZ)B4HD>Ov=^}QI0EyU8l)IzQNqv0Y1C(KMWW8- zT1K!$5_c|_v3Ax8rLUb$T+A+%VI~c!$U#`qzvx9KMd$KV0~`qtdxIQ*fy3TYGe;T- zT_pgefQVPrBa^)6+Fs7y1CMfIz-#KcxQWm0@e_D7FJBjU&^=TBF7Rtx(fRQoIgS%# zc;75(j}xd3&Vhm8uYmPj`%3ch6Uaa8K6shQKY0mJ1N}-HDOpg1l7x_`erFr0p8<)| zo|gACvlNcGT&o|gACvpfk|&-b*vV#Twv7H2Lsv+R)H z@iOuCo|adeSpFwaCn*4nRlT9e$xwAp$7r5dAuUBatR+ci& z$atJ!kp11s;%+!^p!~k#;;Bv+*-NRcOr0z)U(Fx^jMtz10x;gSySaW|`zXCXm%e>e zQU|y0fKh&29a5i_#{rJc$pHt2!u$T%zo%@DI=S;!Muo9cT00LL(wb0z`ar%m}t%#wKixg)6w&- zQScqv+qq+bSA%Ht%glpA^{?q8Djp}ewI;3qd!DD^QSZMGfhpQ*-<%b;Zf|zX7in0@yca~RvbBzKUrCP-k z>!nbu;$g({5^SMQ81%hzWGh);ZI*YA;?r}_je_syw271J?wkPo2A2aTC~h#=hR~O* zdZi3H=|^qf4$9WTeUXz?MBvzrI^FtL6*aUWz;js!NfMYc*@Ww{p-tBbJUu{ z3|3}u&*IBr;Lj+=R{QIXytdG=bMdvZ4Qu5r^o&X%=fPxbsk6}5;3-2` zmX)d0*LHJ0Ntlgu(iMX0%}PT=|lg?Suk58Gxd=BajaWp<`sHA3<>oyNUd+2Z{#r z12YSw43Yhm-M|K1xuA=-V#4zWRC}Gr>iW_&+$@M4^iG?ALCcs%YmGm*jLrl`kF}nh z?H#`gChj|dTYwE4+RDl|bG+NOTBoHT+*B~3DJ7kj_po;r^w}^fP$JMt_!{^*Y}Db* zZQ_J6wj=-reikADeeX0|yN`yy zF)@$^S?V)&GFJRGn&#L#p+(p5ggXd^ktK0O*@(uCI<7W7NK0T5wBE#sEkOga7MO}7 z%>Ye-<@$urKxEuyC@a{?Ihs<-V0#;rH(Zk8+{D+xGGXb`ZXDulI5BXRVWD-(z_6xc zBaaHSbSWyk{9* z7y;xHhRrO%8&V4fr-9J9O`$ zn_$dZ#HE2*$Wlv_pY)U>MD0F!%he}*rX81+aiwYJp9V(;)f(*Ku-wN?6%=G-qH79? z?!hvY=df%u(ldC0mdFZ7!~vs)NEyE}j7a4&WeM!Qb#lslOpz zqDu$e+oLT-GXsLs@iI8E$tANy<^udp!ve?Yf^y)WnTcT(A-~CjT^vk=R$buhAubB{ zNjIQI0he+KvLtRT2@Oc!5zsGoE7-9qZWKLgna~uo*b*yIL`X!rK-GN=6j>BZRXRER z)x`Y@P5=}LL7P~Wg@Yv!;f;_d+#x~{xa$+)tkiMDR-X|a1jhmX1ROzehLeVUzjJav zPH=;}>#e>yxyO`8ZlWH#?ptG;fmAALdw=e}GRN(mu$)}|P9HbL7&Hw(HC-6OVeTf?QVSOUN`{-D z{JApZIvo;`SU1;n&wuI*OrufiW|CKig$3+m4?N=nzYPOtC&bmyT-+s~bZb$TNc|cq z`ie(xvAOW}9wyR4m8qcN&6bdlgYgopPkC~RTOhA><3ODIaSY)*Bw%sE(uVz;jy47w zK+Z5yj5~>aq`+eb7uBF0;nTaLiLDToOiN8 zwZ5s~H6b93j7^qN%|Up}VtV2&Id6kcctS(_G70U~$|TgiJj6@+I9`Ikd-(S`?(1sm zkkh1}<56(@x(oaEi(oL7eehuL_9W!1?1PQ6*DAg@pX0EewD5SI!^+63oRs!Fhm|c= zl0!ZnP_syyTcR5|ti#Mjz?LDk$;6)qY#CC^mgfb4EyFvxsy`3dGNhIVFwXacaEAcJ-3jh4#kFX0ibIj&!6kTcr%+sq)8bkjiqqom?p)|Q-#2&W&Sd5} zXP>pUBp54`7kBO~ie+d1VoFKi|U|AXc7kdXZOK$bFPB_9T8i zABJ>bUg1h7+pq?q)3o+&#sALL=c!k^*ZjIy$JMdKvt;F770hed%p)|I7hr8fJIo88 z1^Ti9_ZrTq68_glUG~epUU`zHdN$(A*#oyv@Gt*n(|`GYoW8_g@C7UX?olB-R?CK)9k5`tgWkKa9!&)6zsP3^Rj6O*dZkI3s z8%&?%6<;1lN9NT5YLi_82$w)+VLygqmBeH^?L_J>X?CF)lMy>b?Gm zK^fr+fICGh8kzH7vc;tS)REd!QPRzjRKZ=h)m{lh&dL8QPq?4VnXV%<%)fE7>1Vc8 z?H!%>VEPz1pfw)C{_7Iu=L8V6EraUvwb6Rzphs;@3Yf7&52TmG<>R$|OV(@QM1m7O zxfgE$-Xe9E0bcP@9pW;;!tYnmvlt!s#i#OzeP zoER%rb)LDr-;6uxSwiwd@;Oga4vWPO3x;~A8>b9`NhpdSj>tPU$H_CS&tjY6koue`0EP!M4x-*z2LP!247%SME4U<0Z>zWD@aely*C|NcUGk3msf}V5T6NoDSi}}1 zJEN4{nleeK>E)B1#nRY6Ft34H+>WceXfZ&B|_0WhnbcT~2@?wGum)Oj>Gin#1%-W*`RkiVo~ z!f>X&Vg1J>E)ZKwe#Ty^!1%PFi%E{7yL&T|T10U%$HMz+?X`V%LP7S^cw7zZ`y#&@ zSb*q->Yv~HjZT*q?W6<|dG>l{57t4~t&9CO+%L8gmd{~ho!zr%CUIpfRQNdZDO6N) zirLT^*&*qgaY3D%i&O`iXMQrJt1~@e<>Top;PST|0#7F~MkkFhzRdCzEtwyD6FF#u zQRac4#+-jowu;|suED$%*K7XExr+x>n=gP^Dw64vjQAs(%<+yfP&Z5(8}Xw?c4n;P zyvbEd_%0e9RtCydk~+@I+kx72qJw+0i$E@}+Cm^!``Ltd57>3M;?Dds+;vxU+t{d7 z7FQRYwS!uNd7-@DIRrJC$>w)=H*;rlo(-@y(2yT0ldYJ%T0`DL7J@VGDJwMf5L>~8 z!iD6P0EZWcxWl9_{N9b#BU>@R;Gs`=PHMRAr;sa@BN4tuVfgaRV-WR*0j;dg@A*Ar zTd}eIgxMJc_C`?@Wyu1Q%8B%JnXoLchEjd}HdAf1;o*9&YfG^b)NdMnfbTR`!65A0 zQMC7DaC1Ap0|*8B(|tw`I?l#~@wIwAZ8}wC0Sq>KB0*wpB7YVrapwCtf!U2bfO(9l zYOGd`@L}Ndaz0qRffJc#ojJ5z39gFe!(pFD9hRi<)%)5*1E{9gKvyL3+cAGV3MT!xln25plo zq-Kx5L7PNzU)dfa|4jX1LowRD`wH!?5&*DI9lwQ?81~J1-Kd(5weNOmiwMlARU3Rw zk$|dFPjAF>KPWgGc$&=;LypJ3r^vh4G-V8pKrJ&l2`GwOqKf0xqmN|2EfGwV-=8}n z-e$#b>J0qV(RA?B`fVe#9|r%xMVgAEnJL@P<3?9r?}eL13%K>jk)3XOHIMtrDiE z$JOV+D7EGgI}24bL#SHwJiYO|6|-cPuP^(TPBr5x+v}>hP3oFQkRG3|9lxZZED$^0 zzea^z1TEY|6*ixU7hea-I$|ieS7YR(E6?O?c^QaK_XOoN@F_tuoBcK5s^~WKlE613 zb4u9sm8jy-B>3Nj%4p_)5K+HZ$k0ggPSV69eX`)Locq6`BNkq_o*h4ik}`J${-e;J z{MW@pz}bIwp+Txd%TV=ZoQE^WKW>fGe~7cCbmy3%5r(Yde{Fy6{SOBo^8T-uj92I@ z-oQk-=wE{7A431;O5yw|l;=X%n}#EEU7+bej+6CouHv#AxPODnBL0`i>#NVdL|H&p zRl?T}5{J&eg#3P4)cI5U=!~r|@Yt^2>;Lk_6{`Q&5DHwPrCV#9tQabhm8 zwKw7Kp}6Gu>0R7AEBoq3>JZY=)!b40kR7g|IpD<`${l$BL>AgPuO-?9$^~6ZxH{$1 zceq2)^&7zSDm2KvLus{uz`rdmuR;)?@EpCpdqaK^%1X%N(A=Okkg^XUhBtdrN zQ!(yGi=Du{bMfpt=O2V9w^T-q!W;{J_EUd2yRWDX91 z+tt9aQIrzpLn$)~o=A!^6IEONt@qJmIw$!vuE+0B?B2OgPsBAIx+2l?1prX6?_Olp zynk_G7?iXpXS=>c>uoL(tgpR0$w@rz zis~aM!V+z~GZdiM=RtokaJmcu1zZ+=s^H7Xe$|rdmA`071OL;K)~Qnb^!0nFf7EsS z>2;KbwAMU!JU}O3kG` zvn2Tjq8Osh8sZ@ht~1UtYo)5I+N!~}LddFK&Y>v(na%~z1)t60sFgKvaBg4v(G4Sg zBHf&6Gg2qL!}64VmhBc*o3yB16}7MjViu13=E_@Z{ZoU?d6!Q_v?hIGa+{{~jdDfD zSkRq)d6P31!mB%i;_mhof<}0f+(cq*4?)zhtbxEIshOpNlI)5f-{V{n+@1?|h8m|? zIM#Mb4INi4=*IKd)%X;{Ih<^muW(u;aD{J}w3d25OfU@j1Og+3fuT_ z^z?K`Oig6mEJnnFMO$6VOpi?6@#-9pdXt6q)X(sCGl*!v>{WFAe-8wdt+MOu>!bEPMZRwbTMf2&J`qSO=9GZPl7Zwdw5CN)2JIQa=#$^e+xSA^F^^XYrg6yc@@-);0)WU{7+%)J6#KerC!nNJbl}vjdY7C{sv!ZJcvcoe86cmi;Dre%= zR`aTcL|9>u!Q`W3!cI<(c0{c3I=iQw`VUMckV@%Q2UuN9gj z#TY~Xo)U$V7K+w6BTUH=fbCKZdwyBg;*ciwiz=js%l>_F)&X~er~@9E-eY`Gg)lJwrKR0ld z{H)>0L)3s`cE}Su>`pO{=~)gD8mNiNdcCtoH_VMy?kAcjM_apv^o68VEn0gqWy#bFW6r2(ioagq&v3o5i$Fsc^= z8W@sGrMfN@23v5!)g_nh+%0cq?io~(>guY3l=Qx=#PM#{Qc}H{DQjzReNwAc+}R6Z z*gl_GV#a?mDU3UjfW!Y)3GJmXcY-);eF!uhVvvw!@?H;MB~_%>a7OLXXQ%$qtKGj5p4{pKozSr4-5G@8wx^0or}9Gf z3Yf$44+nqaD(5{dQkW0yX*#U7eEC~=nK~v}ng#+2VT5>6ha%n%#s^zlHWiTfipTBH z^Z%}OLvQJ+3i#}lKf^VR#rCc8azk;Q@rjoB$J1RXsuvOSWG zWL~T52%GNb&QYefbB;FpoR)zaG6N`z)fnBP;#vA_3v>Cfq zXx}>ZZ~ikz=m##|_4V`py6s=v4-*v)s%h*M3A=ERW*DRnU?Pt%Yg>{c6CLYx&_W^0C5&cx>=s zwa$(?8e-uZDqY;YrwwoK8(oixs=%1cC{?Z*`3WH~pXO1)Pd(ht4od{DusD>k@_U$# z9|I~=ln`=DF)vGhGM&NGI1mlTf;^A#6V+W!OH2 zop$%uMpxXmkTW{cDc;0iwqVPoif7t)mWicaZrM71zv&VCAjRb}rhfAyI;DYvNfwOW zME^}PB8AuS?A6^#976Gm4Ug-Y_V2Uf$I(DvX*#JnB+NV&WGFyAxv&5T=8sgu&2fJt zzYx;F2@2H`kIjFIPz;QP|7PK=h@Tk`7Xh#CR@_$B9hfkY(aqC+^9S5*xJ*?*fL#vs zQ-PuVPyzCxu*9dzC{iU9-z2`0T`jyc?ScBdW!9K!4BoZGOh(31@${oOC(zNf1~5qM zFoW4Q?~&}oX#f>6&ZZN}3i2GGQQK8mo5KPUe1iG6 zmlcxwz6U7P2i4Hk36`aM84p<-FoK1U?IHbAII|{287}Un66+q^EvTfKdc|6zS6uNs z-u#dEd1&$kgEMGD5yeYArs5UNZF%&<;lhgygEjlKvPHTBlq9hKmv>kD-;rAT$qug*oj2YZe(%TXw z(lM$q2Q?euhG8A`VT$ZE`vG~Jh3`m!3C4MbkY*rW?*=Hr?oU2rNpiEd^A-f($0Z^rjQ(Q`K8c&1Fis zhpW|0ZaWBnD&Dj(ke7-Hip?cA&!9#)AGi7Z61N^z@CwhI43CuXbSu$q+JkIF0$4}u z;l0#%GTcbJuZ%SgwmV6sD@pj6V9D0QTy7OCO=)~qJ`L~^cHmcMbl+Id24)I|E7v&? zGab>losFaD>)My;UjU>e^7}0H-1^^LfmeOWXB?PGGk7`H6jN`=hr&yH`i4PF;XfL! zb+EiskY#bPGl(*9q;`-;eV_7KTZG={keAb+5tDR%Qzl0Fo;=$$<( zB(!oc5aUVK-V*QR*c8B9hC17vNJYd<>a@S|LoC8yHaIMd|pvfkFW$b^azD;;-=u$tY#B6-jlKbOs+xDZ{rF0RQS zc;&)!$Ftn>Y5C5oTN1I?BGyLc|t5GIJ8ecEdh>uYhfA% z3P?kd7?oEkuQ)i;Oo9O`lF8VsnYD>-$zcj_nm8t8Nx;E zY-QNAh27Vqdh4Cty2{x~5BLg!H*USdwlVtJpkiC!k5yr8=iKEGp!=lWg>A3Cw4 z#^5-zBzF9zcqsJpBeqjD9&fR%vTA-}>icKUcI|(P z<4gim7H5C|sv4<8x9xaa^CDNrgZtP5WdV9xx;?22&O}-*m5dR6Kt$1#>Qry3>h%yZ zGY_<-)6DSf7vHzfAS=g82LTqgX&fHrZ_Z18(|5ba9z~|BCh!sR*GZw3U+pz=~x)W`h+*UP)G!HF|FyA__uD4HiB z#B$Pnq6nA%fp7FN@`vIM2LO5MJL(~F$1+hlt&Mda+hL|4m#hR%Mw4CTVs))YX@ZQJ z*Bn@xLN9);f)=ITGUm6%?|&uD5)oP0?6Fr!G@J zB)Qt#NhnmDN|OM33*F3@$V^;*M@ZgHVsfF62qBqs>WPF~W(RY4u|4;X54*B&>4lyj zT5o{~xi}m!^U}6WlxTk;O}spyMJuO};u^Ccl=mMGdqsW?7!nS8r6%Hg|3ePrG~D>U zQqOmiuM1wN!*8|Q@kZW`AhMn|VD%~KX9_|7;b~1SPM?H)Fw?y-`aV>BMjjzOxGZ1^ uLM=uOQv&q)P7ZavtzE`tiW+uQxpL$_=S#nJ;XZ$c-=F$1`>_=c=6?VW+IW!w diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index 32d3f199c85cf..5906a837e8713 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -2,7 +2,7 @@ import json import os -from typing import Annotated, Any, Literal, Optional, cast +from typing import Annotated, Any, Literal, Optional, Union, cast import openai import pytest @@ -14,22 +14,33 @@ HumanMessage, MessageLikeRepresentation, ) +from langchain_core.v1.messages import AIMessage as AIMessageV1 +from langchain_core.v1.messages import AIMessageChunk as AIMessageChunkV1 +from langchain_core.v1.messages import HumanMessage as HumanMessageV1 from pydantic import BaseModel from typing_extensions import TypedDict from langchain_openai import ChatOpenAI, custom_tool +from langchain_openai.v1 import ChatOpenAI as ChatOpenAIV1 MODEL_NAME = "gpt-4o-mini" -def _check_response(response: Optional[BaseMessage]) -> None: - assert isinstance(response, AIMessage) +def _check_response( + response: Optional[Union[BaseMessage, AIMessageV1]], output_version: str +) -> None: + if output_version == "v1": + assert isinstance(response, AIMessageV1) or isinstance( + response, AIMessageChunkV1 + ) + else: + assert isinstance(response, AIMessage) assert isinstance(response.content, list) for block in response.content: assert isinstance(block, dict) if block["type"] == "text": - assert isinstance(block["text"], str) - for annotation in block["annotations"]: + assert isinstance(block["text"], str) # type: ignore[typeddict-item] + for annotation in block["annotations"]: # type: ignore[typeddict-item] if annotation["type"] == "file_citation": assert all( key in annotation @@ -40,8 +51,16 @@ def _check_response(response: Optional[BaseMessage]) -> None: key in annotation for key in ["end_index", "start_index", "title", "type", "url"] ) - - text_content = response.text() + elif annotation["type"] == "citation": + assert all(key in annotation for key in ["title", "type"]) + if "url" in annotation: + assert "start_index" in annotation + assert "end_index" in annotation + + if output_version == "v1": + text_content = response.text + else: + text_content = response.text() # type: ignore[operator,misc] assert isinstance(text_content, str) assert text_content assert response.usage_metadata @@ -49,68 +68,74 @@ def _check_response(response: Optional[BaseMessage]) -> None: assert response.usage_metadata["output_tokens"] > 0 assert response.usage_metadata["total_tokens"] > 0 assert response.response_metadata["model_name"] - assert response.response_metadata["service_tier"] + assert response.response_metadata["service_tier"] # type: ignore[typeddict-item] +@pytest.mark.default_cassette("test_web_search.yaml.gz") @pytest.mark.vcr -def test_web_search() -> None: - llm = ChatOpenAI(model=MODEL_NAME, output_version="responses/v1") +@pytest.mark.parametrize("output_version", ["responses/v1", "v1"]) +def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: + if output_version == "v1": + llm = ChatOpenAIV1(model=MODEL_NAME) + else: + llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version) # type: ignore[assignment] first_response = llm.invoke( "What was a positive news story from today?", tools=[{"type": "web_search_preview"}], ) - _check_response(first_response) + _check_response(first_response, output_version) # Test streaming - full: Optional[BaseMessageChunk] = None - for chunk in llm.stream( - "What was a positive news story from today?", - tools=[{"type": "web_search_preview"}], - ): - assert isinstance(chunk, AIMessageChunk) - full = chunk if full is None else full + chunk - _check_response(full) + if isinstance(llm, ChatOpenAIV1): + full: Optional[AIMessageChunkV1] = None + for chunk in llm.stream( + "What was a positive news story from today?", + tools=[{"type": "web_search_preview"}], + ): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + else: + full: Optional[BaseMessageChunk] = None # type: ignore[no-redef] + for chunk in llm.stream( + "What was a positive news story from today?", + tools=[{"type": "web_search_preview"}], + ): + assert isinstance(chunk, AIMessageChunk) + full = chunk if full is None else full + chunk + _check_response(full, output_version) # Use OpenAI's stateful API response = llm.invoke( "what about a negative one", tools=[{"type": "web_search_preview"}], - previous_response_id=first_response.response_metadata["id"], + previous_response_id=first_response.response_metadata["id"], # type: ignore[typeddict-item] ) - _check_response(response) + _check_response(response, output_version) # Manually pass in chat history response = llm.invoke( [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What was a positive news story from today?", - } - ], - }, + {"role": "user", "content": "What was a positive news story from today?"}, first_response, - { - "role": "user", - "content": [{"type": "text", "text": "what about a negative one"}], - }, + {"role": "user", "content": "what about a negative one"}, ], tools=[{"type": "web_search_preview"}], ) - _check_response(response) + _check_response(response, output_version) # Bind tool response = llm.bind_tools([{"type": "web_search_preview"}]).invoke( "What was a positive news story from today?" ) - _check_response(response) + _check_response(response, output_version) for msg in [first_response, full, response]: - assert isinstance(msg, AIMessage) + assert msg is not None block_types = [block["type"] for block in msg.content] # type: ignore[index] - assert block_types == ["web_search_call", "text"] + if output_version == "responses/v1": + assert block_types == ["web_search_call", "text"] + else: + assert block_types == ["web_search_call", "web_search_result", "text"] @pytest.mark.flaky(retries=3, delay=1) @@ -120,7 +145,7 @@ async def test_web_search_async() -> None: "What was a positive news story from today?", tools=[{"type": "web_search_preview"}], ) - _check_response(response) + _check_response(response, "v0") assert response.response_metadata["status"] # Test streaming @@ -132,7 +157,7 @@ async def test_web_search_async() -> None: assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) - _check_response(full) + _check_response(full, "v0") for msg in [response, full]: assert msg.additional_kwargs["tool_outputs"] @@ -141,13 +166,15 @@ async def test_web_search_async() -> None: assert tool_output["type"] == "web_search_call" -@pytest.mark.flaky(retries=3, delay=1) -def test_function_calling() -> None: +@pytest.mark.default_cassette("test_function_calling.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) +def test_function_calling(output_version: Literal["v0", "responses/v1"]) -> None: def multiply(x: int, y: int) -> int: """return x * y""" return x * y - llm = ChatOpenAI(model=MODEL_NAME) + llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version) bound_llm = llm.bind_tools([multiply, {"type": "web_search_preview"}]) ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4")) assert len(ai_msg.tool_calls) == 1 @@ -163,7 +190,33 @@ def multiply(x: int, y: int) -> int: assert set(full.tool_calls[0]["args"]) == {"x", "y"} response = bound_llm.invoke("What was a positive news story from today?") - _check_response(response) + _check_response(response, output_version) + + +@pytest.mark.default_cassette("test_function_calling.yaml.gz") +@pytest.mark.vcr +def test_function_calling_v1() -> None: + def multiply(x: int, y: int) -> int: + """return x * y""" + return x * y + + llm = ChatOpenAIV1(model=MODEL_NAME) + bound_llm = llm.bind_tools([multiply, {"type": "web_search_preview"}]) + ai_msg = bound_llm.invoke("whats 5 * 4") + assert len(ai_msg.tool_calls) == 1 + assert ai_msg.tool_calls[0]["name"] == "multiply" + assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"} + + full: Any = None + for chunk in bound_llm.stream("whats 5 * 4"): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + assert len(full.tool_calls) == 1 + assert full.tool_calls[0]["name"] == "multiply" + assert set(full.tool_calls[0]["args"]) == {"x", "y"} + + response = bound_llm.invoke("What was a positive news story from today?") + _check_response(response, "v1") class Foo(BaseModel): @@ -174,8 +227,13 @@ class FooDict(TypedDict): response: str -def test_parsed_pydantic_schema() -> None: - llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) +@pytest.mark.default_cassette("test_parsed_pydantic_schema.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) +def test_parsed_pydantic_schema(output_version: Literal["v0", "responses/v1"]) -> None: + llm = ChatOpenAI( + model=MODEL_NAME, use_responses_api=True, output_version=output_version + ) response = llm.invoke("how are ya", response_format=Foo) parsed = Foo(**json.loads(response.text())) assert parsed == response.additional_kwargs["parsed"] @@ -192,6 +250,30 @@ def test_parsed_pydantic_schema() -> None: assert parsed.response +@pytest.mark.default_cassette("test_parsed_pydantic_schema.yaml.gz") +@pytest.mark.vcr +def test_parsed_pydantic_schema_v1() -> None: + llm = ChatOpenAIV1(model=MODEL_NAME, use_responses_api=True) + response = llm.invoke("how are ya", response_format=Foo) + assert response.text + parsed = Foo(**json.loads(response.text)) + assert parsed == response.parsed + assert parsed.response + + # Test stream + full: Optional[AIMessageChunkV1] = None + chunks = [] + for chunk in llm.stream("how are ya", response_format=Foo): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + chunks.append(chunk) + assert isinstance(full, AIMessageChunkV1) + assert full.text + parsed = Foo(**json.loads(full.text)) + assert parsed == full.parsed + assert parsed.response + + async def test_parsed_pydantic_schema_async() -> None: llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) response = await llm.ainvoke("how are ya", response_format=Foo) @@ -323,6 +405,26 @@ def test_reasoning(output_version: Literal["v0", "responses/v1"]) -> None: assert block_types == ["reasoning", "text"] +@pytest.mark.default_cassette("test_reasoning.yaml.gz") +@pytest.mark.vcr +def test_reasoning_v1() -> None: + llm = ChatOpenAIV1(model="o4-mini", use_responses_api=True) + response = llm.invoke("Hello", reasoning={"effort": "low"}) + assert isinstance(response, AIMessageV1) + + # Test init params + streaming + llm = ChatOpenAIV1(model="o4-mini", reasoning={"effort": "low"}) + full: Optional[AIMessageChunkV1] = None + for chunk in llm.stream("Hello"): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunkV1) + + for msg in [response, full]: + block_types = [block["type"] for block in msg.content] + assert block_types == ["reasoning", "text"] + + def test_stateful_api() -> None: llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) response = llm.invoke("how are you, my name is Bobo") @@ -358,20 +460,25 @@ def test_computer_calls() -> None: def test_file_search() -> None: pytest.skip() # TODO: set up infra - llm = ChatOpenAI(model=MODEL_NAME) + llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) tool = { "type": "file_search", "vector_store_ids": [os.environ["OPENAI_VECTOR_STORE_ID"]], } - response = llm.invoke("What is deep research by OpenAI?", tools=[tool]) - _check_response(response) + + input_message = {"role": "user", "content": "What is deep research by OpenAI?"} + response = llm.invoke([input_message], tools=[tool]) + _check_response(response, "v0") full: Optional[BaseMessageChunk] = None - for chunk in llm.stream("What is deep research by OpenAI?", tools=[tool]): + for chunk in llm.stream([input_message], tools=[tool]): assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) - _check_response(full) + _check_response(full, "v0") + + next_message = {"role": "user", "content": "Thank you."} + _ = llm.invoke([input_message, full, next_message]) @pytest.mark.default_cassette("test_stream_reasoning_summary.yaml.gz") @@ -398,20 +505,28 @@ def test_stream_reasoning_summary( if output_version == "v0": reasoning = response_1.additional_kwargs["reasoning"] assert set(reasoning.keys()) == {"id", "type", "summary"} + summary = reasoning["summary"] + assert isinstance(summary, list) + for block in summary: + assert isinstance(block, dict) + assert isinstance(block["type"], str) + assert isinstance(block["text"], str) + assert block["text"] else: + # output_version == "responses/v1" reasoning = next( block for block in response_1.content if block["type"] == "reasoning" # type: ignore[index] ) assert set(reasoning.keys()) == {"id", "type", "summary", "index"} - summary = reasoning["summary"] - assert isinstance(summary, list) - for block in summary: - assert isinstance(block, dict) - assert isinstance(block["type"], str) - assert isinstance(block["text"], str) - assert block["text"] + summary = reasoning["summary"] + assert isinstance(summary, list) + for block in summary: + assert isinstance(block, dict) + assert isinstance(block["type"], str) + assert isinstance(block["text"], str) + assert block["text"] # Check we can pass back summaries message_2 = {"role": "user", "content": "Thank you."} @@ -419,9 +534,48 @@ def test_stream_reasoning_summary( assert isinstance(response_2, AIMessage) +@pytest.mark.default_cassette("test_stream_reasoning_summary.yaml.gz") @pytest.mark.vcr -def test_code_interpreter() -> None: - llm = ChatOpenAI(model="o4-mini", use_responses_api=True) +def test_stream_reasoning_summary_v1() -> None: + llm = ChatOpenAIV1( + model="o4-mini", + # Routes to Responses API if `reasoning` is set. + reasoning={"effort": "medium", "summary": "auto"}, + ) + message_1 = { + "role": "user", + "content": "What was the third tallest buliding in the year 2000?", + } + response_1: Optional[AIMessageChunkV1] = None + for chunk in llm.stream([message_1]): + assert isinstance(chunk, AIMessageChunkV1) + response_1 = chunk if response_1 is None else response_1 + chunk + assert isinstance(response_1, AIMessageChunkV1) + + total_reasoning_blocks = 0 + for block in response_1.content: + if block["type"] == "reasoning": + total_reasoning_blocks += 1 + assert isinstance(block["id"], str) and block["id"].startswith("rs_") + assert isinstance(block["reasoning"], str) + assert isinstance(block["index"], int) + assert ( + total_reasoning_blocks > 1 + ) # This query typically generates multiple reasoning blocks + + # Check we can pass back summaries + message_2 = {"role": "user", "content": "Thank you."} + response_2 = llm.invoke([message_1, response_1, message_2]) + assert isinstance(response_2, AIMessageV1) + + +@pytest.mark.default_cassette("test_code_interpreter.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) +def test_code_interpreter(output_version: Literal["v0", "responses/v1"]) -> None: + llm = ChatOpenAI( + model="o4-mini", use_responses_api=True, output_version=output_version + ) llm_with_tools = llm.bind_tools( [{"type": "code_interpreter", "container": {"type": "auto"}}] ) @@ -430,15 +584,25 @@ def test_code_interpreter() -> None: "content": "Write and run code to answer the question: what is 3^3?", } response = llm_with_tools.invoke([input_message]) - _check_response(response) - tool_outputs = response.additional_kwargs["tool_outputs"] - assert tool_outputs - assert any(output["type"] == "code_interpreter_call" for output in tool_outputs) + assert isinstance(response, AIMessage) + _check_response(response, output_version) + if output_version == "v0": + tool_outputs = [ + item + for item in response.additional_kwargs["tool_outputs"] + if item["type"] == "code_interpreter_call" + ] + else: + # responses/v1 + tool_outputs = [ + item + for item in response.content + if isinstance(item, dict) and item["type"] == "code_interpreter_call" + ] + assert len(tool_outputs) == 1 # Test streaming # Use same container - tool_outputs = response.additional_kwargs["tool_outputs"] - assert len(tool_outputs) == 1 container_id = tool_outputs[0]["container_id"] llm_with_tools = llm.bind_tools( [{"type": "code_interpreter", "container": container_id}] @@ -449,9 +613,72 @@ def test_code_interpreter() -> None: assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) - tool_outputs = full.additional_kwargs["tool_outputs"] + if output_version == "v0": + tool_outputs = [ + item + for item in response.additional_kwargs["tool_outputs"] + if item["type"] == "code_interpreter_call" + ] + else: + # responses/v1 + tool_outputs = [ + item + for item in response.content + if isinstance(item, dict) and item["type"] == "code_interpreter_call" + ] + assert tool_outputs + + # Test we can pass back in + next_message = {"role": "user", "content": "Please add more comments to the code."} + _ = llm_with_tools.invoke([input_message, full, next_message]) + + +@pytest.mark.default_cassette("test_code_interpreter.yaml.gz") +@pytest.mark.vcr +def test_code_interpreter_v1() -> None: + llm = ChatOpenAIV1(model="o4-mini", use_responses_api=True) + llm_with_tools = llm.bind_tools( + [{"type": "code_interpreter", "container": {"type": "auto"}}] + ) + input_message = { + "role": "user", + "content": "Write and run code to answer the question: what is 3^3?", + } + response = llm_with_tools.invoke([input_message]) + assert isinstance(response, AIMessageV1) + _check_response(response, "v1") + + tool_outputs = [ + item for item in response.content if item["type"] == "code_interpreter_call" + ] + code_interpreter_result = next( + item for item in response.content if item["type"] == "code_interpreter_result" + ) + assert tool_outputs + assert code_interpreter_result + assert len(tool_outputs) == 1 + + # Test streaming + # Use same container + container_id = tool_outputs[0]["container_id"] # type: ignore[typeddict-item] + llm_with_tools = llm.bind_tools( + [{"type": "code_interpreter", "container": container_id}] + ) + + full: Optional[AIMessageChunkV1] = None + for chunk in llm_with_tools.stream([input_message]): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunkV1) + code_interpreter_call = next( + item for item in full.content if item["type"] == "code_interpreter_call" + ) + code_interpreter_result = next( + item for item in full.content if item["type"] == "code_interpreter_result" + ) + assert code_interpreter_call + assert code_interpreter_result assert tool_outputs - assert any(output["type"] == "code_interpreter_call" for output in tool_outputs) # Test we can pass back in next_message = {"role": "user", "content": "Please add more comments to the code."} @@ -546,10 +773,66 @@ def test_mcp_builtin_zdr() -> None: _ = llm_with_tools.invoke([input_message, full, approval_message]) -@pytest.mark.vcr() -def test_image_generation_streaming() -> None: +@pytest.mark.default_cassette("test_mcp_builtin_zdr.yaml.gz") +@pytest.mark.vcr +def test_mcp_builtin_zdr_v1() -> None: + llm = ChatOpenAIV1( + model="o4-mini", store=False, include=["reasoning.encrypted_content"] + ) + + llm_with_tools = llm.bind_tools( + [ + { + "type": "mcp", + "server_label": "deepwiki", + "server_url": "https://mcp.deepwiki.com/mcp", + "require_approval": {"always": {"tool_names": ["read_wiki_structure"]}}, + } + ] + ) + input_message = { + "role": "user", + "content": ( + "What transport protocols does the 2025-03-26 version of the MCP spec " + "support?" + ), + } + full: Optional[AIMessageChunkV1] = None + for chunk in llm_with_tools.stream([input_message]): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + + assert isinstance(full, AIMessageChunkV1) + assert all(isinstance(block, dict) for block in full.content) + + approval_message = HumanMessageV1( + [ + { + "type": "non_standard", + "value": { + "type": "mcp_approval_response", + "approve": True, + "approval_request_id": block["value"]["id"], # type: ignore[index] + }, + } + for block in full.content + if block["type"] == "non_standard" + and block["value"]["type"] == "mcp_approval_request" # type: ignore[index] + ] + ) + _ = llm_with_tools.invoke([input_message, full, approval_message]) + + +@pytest.mark.default_cassette("test_image_generation_streaming.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) +def test_image_generation_streaming( + output_version: Literal["v0", "responses/v1"], +) -> None: """Test image generation streaming.""" - llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True) + llm = ChatOpenAI( + model="gpt-4.1", use_responses_api=True, output_version=output_version + ) tool = { "type": "image_generation", # For testing purposes let's keep the quality low, so the test runs faster. @@ -596,15 +879,82 @@ def test_image_generation_streaming() -> None: # At the moment, the streaming API does not pick up annotations fully. # So the following check is commented out. # _check_response(complete_ai_message) - tool_output = complete_ai_message.additional_kwargs["tool_outputs"][0] - assert set(tool_output.keys()).issubset(expected_keys) + if output_version == "v0": + assert complete_ai_message.additional_kwargs["tool_outputs"] + tool_output = complete_ai_message.additional_kwargs["tool_outputs"][0] + assert set(tool_output.keys()).issubset(expected_keys) + elif output_version == "responses/v1": + tool_output = next( + block + for block in complete_ai_message.content + if isinstance(block, dict) and block["type"] == "image_generation_call" + ) + assert set(tool_output.keys()).issubset(expected_keys) + else: + # v1 + standard_keys = {"type", "base64", "id", "status", "index"} + tool_output = next( + block + for block in complete_ai_message.content + if isinstance(block, dict) and block["type"] == "image" + ) + assert set(standard_keys).issubset(tool_output.keys()) -@pytest.mark.vcr() -def test_image_generation_multi_turn() -> None: +@pytest.mark.default_cassette("test_image_generation_streaming.yaml.gz") +@pytest.mark.vcr +def test_image_generation_streaming_v1() -> None: + """Test image generation streaming.""" + llm = ChatOpenAIV1(model="gpt-4.1", use_responses_api=True) + tool = { + "type": "image_generation", + "quality": "low", + "output_format": "jpeg", + "output_compression": 100, + "size": "1024x1024", + } + + expected_keys = { + # Standard + "type", + "base64", + "mime_type", + "id", + "index", + # OpenAI-specific + "background", + "output_format", + "quality", + "revised_prompt", + "size", + "status", + } + + full: Optional[AIMessageChunkV1] = None + for chunk in llm.stream("Draw a random short word in green font.", tools=[tool]): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + complete_ai_message = cast(AIMessageChunkV1, full) + + tool_output = next( + block + for block in complete_ai_message.content + if isinstance(block, dict) and block["type"] == "image" + ) + assert set(expected_keys).issubset(tool_output.keys()) + + +@pytest.mark.default_cassette("test_image_generation_multi_turn.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) +def test_image_generation_multi_turn( + output_version: Literal["v0", "responses/v1"], +) -> None: """Test multi-turn editing of image generation by passing in history.""" # Test multi-turn - llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True) + llm = ChatOpenAI( + model="gpt-4.1", use_responses_api=True, output_version=output_version + ) # Test invocation tool = { "type": "image_generation", @@ -620,10 +970,41 @@ def test_image_generation_multi_turn() -> None: {"role": "user", "content": "Draw a random short word in green font."} ] ai_message = llm_with_tools.invoke(chat_history) - _check_response(ai_message) - tool_output = ai_message.additional_kwargs["tool_outputs"][0] + assert isinstance(ai_message, AIMessage) + _check_response(ai_message, output_version) - # Example tool output for an image + expected_keys = { + "id", + "background", + "output_format", + "quality", + "result", + "revised_prompt", + "size", + "status", + "type", + } + + if output_version == "v0": + tool_output = ai_message.additional_kwargs["tool_outputs"][0] + assert set(tool_output.keys()).issubset(expected_keys) + elif output_version == "responses/v1": + tool_output = next( + block + for block in ai_message.content + if isinstance(block, dict) and block["type"] == "image_generation_call" + ) + assert set(tool_output.keys()).issubset(expected_keys) + else: + standard_keys = {"type", "base64", "id", "status"} + tool_output = next( + block + for block in ai_message.content + if isinstance(block, dict) and block["type"] == "image" + ) + assert set(standard_keys).issubset(tool_output.keys()) + + # Example tool output for an image (v0) # { # "background": "opaque", # "id": "ig_683716a8ddf0819888572b20621c7ae4029ec8c11f8dacf8", @@ -639,19 +1020,90 @@ def test_image_generation_multi_turn() -> None: # "result": # base64 encode image data # } + chat_history.extend( + [ + # AI message with tool output + ai_message, + # New request + { + "role": "user", + "content": ( + "Now, change the font to blue. Keep the word and everything else " + "the same." + ), + }, + ] + ) + + ai_message2 = llm_with_tools.invoke(chat_history) + assert isinstance(ai_message2, AIMessage) + _check_response(ai_message2, output_version) + + if output_version == "v0": + tool_output = ai_message2.additional_kwargs["tool_outputs"][0] + assert set(tool_output.keys()).issubset(expected_keys) + elif output_version == "responses/v1": + tool_output = next( + block + for block in ai_message2.content + if isinstance(block, dict) and block["type"] == "image_generation_call" + ) + assert set(tool_output.keys()).issubset(expected_keys) + else: + standard_keys = {"type", "base64", "id", "status"} + tool_output = next( + block + for block in ai_message2.content + if isinstance(block, dict) and block["type"] == "image" + ) + assert set(standard_keys).issubset(tool_output.keys()) + + +@pytest.mark.default_cassette("test_image_generation_multi_turn.yaml.gz") +@pytest.mark.vcr +def test_image_generation_multi_turn_v1() -> None: + """Test multi-turn editing of image generation by passing in history.""" + # Test multi-turn + llm = ChatOpenAIV1(model="gpt-4.1", use_responses_api=True) + # Test invocation + tool = { + "type": "image_generation", + "quality": "low", + "output_format": "jpeg", + "output_compression": 100, + "size": "1024x1024", + } + llm_with_tools = llm.bind_tools([tool]) + + chat_history: list[MessageLikeRepresentation] = [ + {"role": "user", "content": "Draw a random short word in green font."} + ] + ai_message = llm_with_tools.invoke(chat_history) + assert isinstance(ai_message, AIMessageV1) + _check_response(ai_message, "v1") + expected_keys = { + # Standard + "type", + "base64", + "mime_type", "id", + # OpenAI-specific "background", "output_format", "quality", - "result", "revised_prompt", "size", "status", - "type", } - assert set(tool_output.keys()).issubset(expected_keys) + standard_keys = {"type", "base64", "id", "status"} + tool_output = next( + block + for block in ai_message.content + if isinstance(block, dict) and block["type"] == "image" + ) + assert set(standard_keys).issubset(tool_output.keys()) chat_history.extend( [ @@ -669,9 +1121,15 @@ def test_image_generation_multi_turn() -> None: ) ai_message2 = llm_with_tools.invoke(chat_history) - _check_response(ai_message2) - tool_output2 = ai_message2.additional_kwargs["tool_outputs"][0] - assert set(tool_output2.keys()).issubset(expected_keys) + assert isinstance(ai_message2, AIMessageV1) + _check_response(ai_message2, "v1") + + tool_output = next( + block + for block in ai_message2.content + if isinstance(block, dict) and block["type"] == "image" + ) + assert set(expected_keys).issubset(tool_output.keys()) @pytest.mark.xfail( From 3ae37b5987b9781d84c8bf1f8da01b779fd437ab Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Wed, 13 Aug 2025 11:12:46 -0400 Subject: [PATCH 14/73] openai: integration tests pass --- .../langchain_openai/chat_models/_compat.py | 6 +- .../langchain_openai/chat_models/base.py | 55 ++- .../chat_models/test_responses_api.py | 342 +++++------------- 3 files changed, 145 insertions(+), 258 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/_compat.py b/libs/partners/openai/langchain_openai/chat_models/_compat.py index f3431f1651499..0409c82c943bb 100644 --- a/libs/partners/openai/langchain_openai/chat_models/_compat.py +++ b/libs/partners/openai/langchain_openai/chat_models/_compat.py @@ -432,7 +432,9 @@ def _iter_blocks() -> Iterable[types.ContentBlock]: "size", ): if extra_key in block: - new_block[extra_key] = block[extra_key] + if "extras" not in new_block: + new_block["extras"] = {} + new_block["extras"][extra_key] = block[extra_key] yield cast(types.ImageContentBlock, new_block) elif block_type == "function_call": @@ -718,6 +720,8 @@ def _convert_from_v1_to_responses( for extra_key in ("id", "status"): if extra_key in block: new_block[extra_key] = block[extra_key] # type: ignore[typeddict-item] + elif extra_key in block.get("extras", {}): + new_block[extra_key] = block["extras"][extra_key] new_content.append(new_block) elif block["type"] == "non_standard" and "value" in block: new_content.append(block["value"]) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 552d45e41eec3..d75a48a0104a6 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -207,7 +207,7 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage: return ChatMessage(content=_dict.get("content", ""), role=role, id=id_) # type: ignore[arg-type] -def _format_message_content(content: Any) -> Any: +def _format_message_content(content: Any, responses_ai_msg: bool = False) -> Any: """Format message content.""" if content and isinstance(content, list): formatted_content = [] @@ -219,7 +219,13 @@ def _format_message_content(content: Any) -> Any: and block["type"] in ("tool_use", "thinking", "reasoning_content") ): continue - elif isinstance(block, dict) and is_data_content_block(block): + elif ( + isinstance(block, dict) + and is_data_content_block(block) + # Responses API messages handled separately in _compat (parsed into + # image generation calls) + and not responses_ai_msg + ): formatted_content.append(convert_to_openai_data_block(block)) # Anthropic image blocks elif ( @@ -252,7 +258,9 @@ def _format_message_content(content: Any) -> Any: return formatted_content -def _convert_message_to_dict(message: BaseMessage) -> dict: +def _convert_message_to_dict( + message: BaseMessage, responses_ai_msg: bool = False +) -> dict: """Convert a LangChain message to a dictionary. Args: @@ -261,7 +269,11 @@ def _convert_message_to_dict(message: BaseMessage) -> dict: Returns: The dictionary. """ - message_dict: dict[str, Any] = {"content": _format_message_content(message.content)} + message_dict: dict[str, Any] = { + "content": _format_message_content( + message.content, responses_ai_msg=responses_ai_msg + ) + } if (name := message.name or message.additional_kwargs.get("name")) is not None: message_dict["name"] = name @@ -296,15 +308,25 @@ def _convert_message_to_dict(message: BaseMessage) -> dict: if "function_call" in message_dict or "tool_calls" in message_dict: message_dict["content"] = message_dict["content"] or None - if "audio" in message.additional_kwargs: - # openai doesn't support passing the data back - only the id - # https://platform.openai.com/docs/guides/audio/multi-turn-conversations + audio: Optional[dict[str, Any]] = None + for block in message.content: + if ( + isinstance(block, dict) + and block.get("type") == "audio" + and (id_ := block.get("id")) + and not responses_ai_msg + ): + # openai doesn't support passing the data back - only the id + # https://platform.openai.com/docs/guides/audio/multi-turn-conversations + audio = {"id": id_} + if not audio and "audio" in message.additional_kwargs: raw_audio = message.additional_kwargs["audio"] audio = ( {"id": message.additional_kwargs["audio"]["id"]} if "id" in raw_audio else raw_audio ) + if audio: message_dict["audio"] = audio elif isinstance(message, SystemMessage): message_dict["role"] = message.additional_kwargs.get( @@ -3694,11 +3716,20 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list: for lc_msg in messages: if isinstance(lc_msg, AIMessage): lc_msg = _convert_from_v03_ai_message(lc_msg) - msg = _convert_message_to_dict(lc_msg) - if isinstance(lc_msg, AIMessage) and isinstance(msg.get("content"), list): - msg["content"] = _convert_from_v1_to_responses( - msg["content"], lc_msg.tool_calls - ) + msg = _convert_message_to_dict(lc_msg, responses_ai_msg=True) + if isinstance(msg.get("content"), list) and all( + isinstance(block, dict) for block in msg["content"] + ): + msg["content"] = _convert_from_v1_to_responses( + msg["content"], lc_msg.tool_calls + ) + else: + msg = _convert_message_to_dict(lc_msg) + # Get content from non-standard content blocks + if isinstance(msg["content"], list): + for i, block in enumerate(msg["content"]): + if isinstance(block, dict) and block.get("type") == "non_standard": + msg["content"][i] = block["value"] # "name" parameter unsupported if "name" in msg: msg.pop("name") diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index 5906a837e8713..f98940f78c0f1 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -2,7 +2,7 @@ import json import os -from typing import Annotated, Any, Literal, Optional, Union, cast +from typing import Annotated, Any, Literal, Optional, cast import openai import pytest @@ -14,27 +14,16 @@ HumanMessage, MessageLikeRepresentation, ) -from langchain_core.v1.messages import AIMessage as AIMessageV1 -from langchain_core.v1.messages import AIMessageChunk as AIMessageChunkV1 -from langchain_core.v1.messages import HumanMessage as HumanMessageV1 from pydantic import BaseModel from typing_extensions import TypedDict from langchain_openai import ChatOpenAI, custom_tool -from langchain_openai.v1 import ChatOpenAI as ChatOpenAIV1 MODEL_NAME = "gpt-4o-mini" -def _check_response( - response: Optional[Union[BaseMessage, AIMessageV1]], output_version: str -) -> None: - if output_version == "v1": - assert isinstance(response, AIMessageV1) or isinstance( - response, AIMessageChunkV1 - ) - else: - assert isinstance(response, AIMessage) +def _check_response(response: Optional[BaseMessage], output_version: str) -> None: + assert isinstance(response, AIMessage) assert isinstance(response.content, list) for block in response.content: assert isinstance(block, dict) @@ -56,11 +45,7 @@ def _check_response( if "url" in annotation: assert "start_index" in annotation assert "end_index" in annotation - - if output_version == "v1": - text_content = response.text - else: - text_content = response.text() # type: ignore[operator,misc] + text_content = response.text() # type: ignore[operator,misc] assert isinstance(text_content, str) assert text_content assert response.usage_metadata @@ -75,10 +60,7 @@ def _check_response( @pytest.mark.vcr @pytest.mark.parametrize("output_version", ["responses/v1", "v1"]) def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: - if output_version == "v1": - llm = ChatOpenAIV1(model=MODEL_NAME) - else: - llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version) # type: ignore[assignment] + llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version) # type: ignore[assignment] first_response = llm.invoke( "What was a positive news story from today?", tools=[{"type": "web_search_preview"}], @@ -86,22 +68,13 @@ def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: _check_response(first_response, output_version) # Test streaming - if isinstance(llm, ChatOpenAIV1): - full: Optional[AIMessageChunkV1] = None - for chunk in llm.stream( - "What was a positive news story from today?", - tools=[{"type": "web_search_preview"}], - ): - assert isinstance(chunk, AIMessageChunkV1) - full = chunk if full is None else full + chunk - else: - full: Optional[BaseMessageChunk] = None # type: ignore[no-redef] - for chunk in llm.stream( - "What was a positive news story from today?", - tools=[{"type": "web_search_preview"}], - ): - assert isinstance(chunk, AIMessageChunk) - full = chunk if full is None else full + chunk + full: Optional[BaseMessageChunk] = None # type: ignore[no-redef] + for chunk in llm.stream( + "What was a positive news story from today?", + tools=[{"type": "web_search_preview"}], + ): + assert isinstance(chunk, AIMessageChunk) + full = chunk if full is None else full + chunk _check_response(full, output_version) # Use OpenAI's stateful API @@ -168,8 +141,8 @@ async def test_web_search_async() -> None: @pytest.mark.default_cassette("test_function_calling.yaml.gz") @pytest.mark.vcr -@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) -def test_function_calling(output_version: Literal["v0", "responses/v1"]) -> None: +@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"]) +def test_function_calling(output_version: Literal["v0", "responses/v1", "v1"]) -> None: def multiply(x: int, y: int) -> int: """return x * y""" return x * y @@ -193,32 +166,6 @@ def multiply(x: int, y: int) -> int: _check_response(response, output_version) -@pytest.mark.default_cassette("test_function_calling.yaml.gz") -@pytest.mark.vcr -def test_function_calling_v1() -> None: - def multiply(x: int, y: int) -> int: - """return x * y""" - return x * y - - llm = ChatOpenAIV1(model=MODEL_NAME) - bound_llm = llm.bind_tools([multiply, {"type": "web_search_preview"}]) - ai_msg = bound_llm.invoke("whats 5 * 4") - assert len(ai_msg.tool_calls) == 1 - assert ai_msg.tool_calls[0]["name"] == "multiply" - assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"} - - full: Any = None - for chunk in bound_llm.stream("whats 5 * 4"): - assert isinstance(chunk, AIMessageChunkV1) - full = chunk if full is None else full + chunk - assert len(full.tool_calls) == 1 - assert full.tool_calls[0]["name"] == "multiply" - assert set(full.tool_calls[0]["args"]) == {"x", "y"} - - response = bound_llm.invoke("What was a positive news story from today?") - _check_response(response, "v1") - - class Foo(BaseModel): response: str @@ -229,8 +176,10 @@ class FooDict(TypedDict): @pytest.mark.default_cassette("test_parsed_pydantic_schema.yaml.gz") @pytest.mark.vcr -@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) -def test_parsed_pydantic_schema(output_version: Literal["v0", "responses/v1"]) -> None: +@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"]) +def test_parsed_pydantic_schema( + output_version: Literal["v0", "responses/v1", "v1"], +) -> None: llm = ChatOpenAI( model=MODEL_NAME, use_responses_api=True, output_version=output_version ) @@ -250,30 +199,6 @@ def test_parsed_pydantic_schema(output_version: Literal["v0", "responses/v1"]) - assert parsed.response -@pytest.mark.default_cassette("test_parsed_pydantic_schema.yaml.gz") -@pytest.mark.vcr -def test_parsed_pydantic_schema_v1() -> None: - llm = ChatOpenAIV1(model=MODEL_NAME, use_responses_api=True) - response = llm.invoke("how are ya", response_format=Foo) - assert response.text - parsed = Foo(**json.loads(response.text)) - assert parsed == response.parsed - assert parsed.response - - # Test stream - full: Optional[AIMessageChunkV1] = None - chunks = [] - for chunk in llm.stream("how are ya", response_format=Foo): - assert isinstance(chunk, AIMessageChunkV1) - full = chunk if full is None else full + chunk - chunks.append(chunk) - assert isinstance(full, AIMessageChunkV1) - assert full.text - parsed = Foo(**json.loads(full.text)) - assert parsed == full.parsed - assert parsed.response - - async def test_parsed_pydantic_schema_async() -> None: llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) response = await llm.ainvoke("how are ya", response_format=Foo) @@ -379,8 +304,8 @@ def multiply(x: int, y: int) -> int: @pytest.mark.default_cassette("test_reasoning.yaml.gz") @pytest.mark.vcr -@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) -def test_reasoning(output_version: Literal["v0", "responses/v1"]) -> None: +@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"]) +def test_reasoning(output_version: Literal["v0", "responses/v1", "v1"]) -> None: llm = ChatOpenAI( model="o4-mini", use_responses_api=True, output_version=output_version ) @@ -405,26 +330,6 @@ def test_reasoning(output_version: Literal["v0", "responses/v1"]) -> None: assert block_types == ["reasoning", "text"] -@pytest.mark.default_cassette("test_reasoning.yaml.gz") -@pytest.mark.vcr -def test_reasoning_v1() -> None: - llm = ChatOpenAIV1(model="o4-mini", use_responses_api=True) - response = llm.invoke("Hello", reasoning={"effort": "low"}) - assert isinstance(response, AIMessageV1) - - # Test init params + streaming - llm = ChatOpenAIV1(model="o4-mini", reasoning={"effort": "low"}) - full: Optional[AIMessageChunkV1] = None - for chunk in llm.stream("Hello"): - assert isinstance(chunk, AIMessageChunkV1) - full = chunk if full is None else full + chunk - assert isinstance(full, AIMessageChunkV1) - - for msg in [response, full]: - block_types = [block["type"] for block in msg.content] - assert block_types == ["reasoning", "text"] - - def test_stateful_api() -> None: llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) response = llm.invoke("how are you, my name is Bobo") @@ -483,9 +388,9 @@ def test_file_search() -> None: @pytest.mark.default_cassette("test_stream_reasoning_summary.yaml.gz") @pytest.mark.vcr -@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) +@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"]) def test_stream_reasoning_summary( - output_version: Literal["v0", "responses/v1"], + output_version: Literal["v0", "responses/v1", "v1"], ) -> None: llm = ChatOpenAI( model="o4-mini", @@ -512,8 +417,7 @@ def test_stream_reasoning_summary( assert isinstance(block["type"], str) assert isinstance(block["text"], str) assert block["text"] - else: - # output_version == "responses/v1" + elif output_version == "responses/v1": reasoning = next( block for block in response_1.content @@ -527,6 +431,18 @@ def test_stream_reasoning_summary( assert isinstance(block["type"], str) assert isinstance(block["text"], str) assert block["text"] + else: + # v1 + total_reasoning_blocks = 0 + for block in response_1.content: + if block["type"] == "reasoning": + total_reasoning_blocks += 1 + assert isinstance(block["id"], str) and block["id"].startswith("rs_") + assert isinstance(block["reasoning"], str) + assert isinstance(block["index"], int) + assert ( + total_reasoning_blocks > 1 + ) # This query typically generates multiple reasoning blocks # Check we can pass back summaries message_2 = {"role": "user", "content": "Thank you."} @@ -534,45 +450,10 @@ def test_stream_reasoning_summary( assert isinstance(response_2, AIMessage) -@pytest.mark.default_cassette("test_stream_reasoning_summary.yaml.gz") -@pytest.mark.vcr -def test_stream_reasoning_summary_v1() -> None: - llm = ChatOpenAIV1( - model="o4-mini", - # Routes to Responses API if `reasoning` is set. - reasoning={"effort": "medium", "summary": "auto"}, - ) - message_1 = { - "role": "user", - "content": "What was the third tallest buliding in the year 2000?", - } - response_1: Optional[AIMessageChunkV1] = None - for chunk in llm.stream([message_1]): - assert isinstance(chunk, AIMessageChunkV1) - response_1 = chunk if response_1 is None else response_1 + chunk - assert isinstance(response_1, AIMessageChunkV1) - - total_reasoning_blocks = 0 - for block in response_1.content: - if block["type"] == "reasoning": - total_reasoning_blocks += 1 - assert isinstance(block["id"], str) and block["id"].startswith("rs_") - assert isinstance(block["reasoning"], str) - assert isinstance(block["index"], int) - assert ( - total_reasoning_blocks > 1 - ) # This query typically generates multiple reasoning blocks - - # Check we can pass back summaries - message_2 = {"role": "user", "content": "Thank you."} - response_2 = llm.invoke([message_1, response_1, message_2]) - assert isinstance(response_2, AIMessageV1) - - @pytest.mark.default_cassette("test_code_interpreter.yaml.gz") @pytest.mark.vcr -@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) -def test_code_interpreter(output_version: Literal["v0", "responses/v1"]) -> None: +@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"]) +def test_code_interpreter(output_version: Literal["v0", "responses/v1", "v1"]) -> None: llm = ChatOpenAI( model="o4-mini", use_responses_api=True, output_version=output_version ) @@ -592,13 +473,28 @@ def test_code_interpreter(output_version: Literal["v0", "responses/v1"]) -> None for item in response.additional_kwargs["tool_outputs"] if item["type"] == "code_interpreter_call" ] - else: - # responses/v1 + assert len(tool_outputs) == 1 + elif output_version == "responses/v1": tool_outputs = [ item for item in response.content if isinstance(item, dict) and item["type"] == "code_interpreter_call" ] + assert len(tool_outputs) == 1 + else: + # v1 + tool_outputs = [ + item + for item in response.content_blocks + if item["type"] == "code_interpreter_call" + ] + code_interpreter_result = next( + item + for item in response.content_blocks + if item["type"] == "code_interpreter_result" + ) + assert tool_outputs + assert code_interpreter_result assert len(tool_outputs) == 1 # Test streaming @@ -619,66 +515,28 @@ def test_code_interpreter(output_version: Literal["v0", "responses/v1"]) -> None for item in response.additional_kwargs["tool_outputs"] if item["type"] == "code_interpreter_call" ] - else: - # responses/v1 + assert tool_outputs + elif output_version == "responses/v1": tool_outputs = [ item for item in response.content if isinstance(item, dict) and item["type"] == "code_interpreter_call" ] - assert tool_outputs - - # Test we can pass back in - next_message = {"role": "user", "content": "Please add more comments to the code."} - _ = llm_with_tools.invoke([input_message, full, next_message]) - - -@pytest.mark.default_cassette("test_code_interpreter.yaml.gz") -@pytest.mark.vcr -def test_code_interpreter_v1() -> None: - llm = ChatOpenAIV1(model="o4-mini", use_responses_api=True) - llm_with_tools = llm.bind_tools( - [{"type": "code_interpreter", "container": {"type": "auto"}}] - ) - input_message = { - "role": "user", - "content": "Write and run code to answer the question: what is 3^3?", - } - response = llm_with_tools.invoke([input_message]) - assert isinstance(response, AIMessageV1) - _check_response(response, "v1") - - tool_outputs = [ - item for item in response.content if item["type"] == "code_interpreter_call" - ] - code_interpreter_result = next( - item for item in response.content if item["type"] == "code_interpreter_result" - ) - assert tool_outputs - assert code_interpreter_result - assert len(tool_outputs) == 1 - - # Test streaming - # Use same container - container_id = tool_outputs[0]["container_id"] # type: ignore[typeddict-item] - llm_with_tools = llm.bind_tools( - [{"type": "code_interpreter", "container": container_id}] - ) - - full: Optional[AIMessageChunkV1] = None - for chunk in llm_with_tools.stream([input_message]): - assert isinstance(chunk, AIMessageChunkV1) - full = chunk if full is None else full + chunk - assert isinstance(full, AIMessageChunkV1) - code_interpreter_call = next( - item for item in full.content if item["type"] == "code_interpreter_call" - ) - code_interpreter_result = next( - item for item in full.content if item["type"] == "code_interpreter_result" - ) - assert code_interpreter_call - assert code_interpreter_result - assert tool_outputs + assert tool_outputs + else: + # v1 + code_interpreter_call = next( + item + for item in full.content_blocks + if item["type"] == "code_interpreter_call" + ) + code_interpreter_result = next( + item + for item in full.content_blocks + if item["type"] == "code_interpreter_result" + ) + assert code_interpreter_call + assert code_interpreter_result # Test we can pass back in next_message = {"role": "user", "content": "Please add more comments to the code."} @@ -776,8 +634,11 @@ def test_mcp_builtin_zdr() -> None: @pytest.mark.default_cassette("test_mcp_builtin_zdr.yaml.gz") @pytest.mark.vcr def test_mcp_builtin_zdr_v1() -> None: - llm = ChatOpenAIV1( - model="o4-mini", store=False, include=["reasoning.encrypted_content"] + llm = ChatOpenAI( + model="o4-mini", + output_version="v1", + store=False, + include=["reasoning.encrypted_content"], ) llm_with_tools = llm.bind_tools( @@ -797,15 +658,15 @@ def test_mcp_builtin_zdr_v1() -> None: "support?" ), } - full: Optional[AIMessageChunkV1] = None + full: Optional[BaseMessageChunk] = None for chunk in llm_with_tools.stream([input_message]): - assert isinstance(chunk, AIMessageChunkV1) + assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk - assert isinstance(full, AIMessageChunkV1) + assert isinstance(full, AIMessageChunk) assert all(isinstance(block, dict) for block in full.content) - approval_message = HumanMessageV1( + approval_message = HumanMessage( [ { "type": "non_standard", @@ -815,7 +676,7 @@ def test_mcp_builtin_zdr_v1() -> None: "approval_request_id": block["value"]["id"], # type: ignore[index] }, } - for block in full.content + for block in full.content_blocks if block["type"] == "non_standard" and block["value"]["type"] == "mcp_approval_request" # type: ignore[index] ] @@ -905,7 +766,7 @@ def test_image_generation_streaming( @pytest.mark.vcr def test_image_generation_streaming_v1() -> None: """Test image generation streaming.""" - llm = ChatOpenAIV1(model="gpt-4.1", use_responses_api=True) + llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True, output_version="v1") tool = { "type": "image_generation", "quality": "low", @@ -914,14 +775,8 @@ def test_image_generation_streaming_v1() -> None: "size": "1024x1024", } - expected_keys = { - # Standard - "type", - "base64", - "mime_type", - "id", - "index", - # OpenAI-specific + standard_keys = {"type", "base64", "mime_type", "id", "index"} + extra_keys = { "background", "output_format", "quality", @@ -930,18 +785,19 @@ def test_image_generation_streaming_v1() -> None: "status", } - full: Optional[AIMessageChunkV1] = None + full: Optional[BaseMessageChunk] = None for chunk in llm.stream("Draw a random short word in green font.", tools=[tool]): - assert isinstance(chunk, AIMessageChunkV1) + assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk - complete_ai_message = cast(AIMessageChunkV1, full) + complete_ai_message = cast(AIMessageChunk, full) tool_output = next( block for block in complete_ai_message.content if isinstance(block, dict) and block["type"] == "image" ) - assert set(expected_keys).issubset(tool_output.keys()) + assert set(standard_keys).issubset(tool_output.keys()) + assert set(extra_keys).issubset(tool_output["extras"].keys()) @pytest.mark.default_cassette("test_image_generation_multi_turn.yaml.gz") @@ -1064,7 +920,7 @@ def test_image_generation_multi_turn( def test_image_generation_multi_turn_v1() -> None: """Test multi-turn editing of image generation by passing in history.""" # Test multi-turn - llm = ChatOpenAIV1(model="gpt-4.1", use_responses_api=True) + llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True, output_version="v1") # Test invocation tool = { "type": "image_generation", @@ -1079,16 +935,11 @@ def test_image_generation_multi_turn_v1() -> None: {"role": "user", "content": "Draw a random short word in green font."} ] ai_message = llm_with_tools.invoke(chat_history) - assert isinstance(ai_message, AIMessageV1) + assert isinstance(ai_message, AIMessage) _check_response(ai_message, "v1") - expected_keys = { - # Standard - "type", - "base64", - "mime_type", - "id", - # OpenAI-specific + standard_keys = {"type", "base64", "mime_type", "id"} + extra_keys = { "background", "output_format", "quality", @@ -1097,13 +948,13 @@ def test_image_generation_multi_turn_v1() -> None: "status", } - standard_keys = {"type", "base64", "id", "status"} tool_output = next( block for block in ai_message.content if isinstance(block, dict) and block["type"] == "image" ) assert set(standard_keys).issubset(tool_output.keys()) + assert set(extra_keys).issubset(tool_output["extras"].keys()) chat_history.extend( [ @@ -1121,7 +972,7 @@ def test_image_generation_multi_turn_v1() -> None: ) ai_message2 = llm_with_tools.invoke(chat_history) - assert isinstance(ai_message2, AIMessageV1) + assert isinstance(ai_message2, AIMessage) _check_response(ai_message2, "v1") tool_output = next( @@ -1129,7 +980,8 @@ def test_image_generation_multi_turn_v1() -> None: for block in ai_message2.content if isinstance(block, dict) and block["type"] == "image" ) - assert set(expected_keys).issubset(tool_output.keys()) + assert set(standard_keys).issubset(tool_output.keys()) + assert set(extra_keys).issubset(tool_output["extras"].keys()) @pytest.mark.xfail( From 2f604eb9a0fd8c51706bed6c9e7c46972bd76e43 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Wed, 13 Aug 2025 11:23:54 -0400 Subject: [PATCH 15/73] openai: carry over refusals fix --- .../openai/langchain_openai/chat_models/base.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index d75a48a0104a6..303d1841d62b4 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -3441,6 +3441,20 @@ def _oai_structured_outputs_parser( return parsed elif ai_msg.additional_kwargs.get("refusal"): raise OpenAIRefusalError(ai_msg.additional_kwargs["refusal"]) + elif any( + isinstance(block, dict) + and block.get("type") == "non_standard" + and "refusal" in block["value"] + for block in ai_msg.content + ): + refusal = next( + block["value"]["refusal"] + for block in ai_msg.content + if isinstance(block, dict) + and block["type"] == "non_standard" + and "refusal" in block["value"] + ) + raise OpenAIRefusalError(refusal) elif ai_msg.tool_calls: return None else: From 153db48c924a1d6f210580d8df6d8e58b4d89e3b Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Wed, 13 Aug 2025 15:32:02 -0400 Subject: [PATCH 16/73] openai: misc fixes for computer calls and custom tools --- .../langchain_openai/chat_models/base.py | 90 ++++++++++++++----- .../chat_models/test_responses_api.py | 10 ++- 2 files changed, 73 insertions(+), 27 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 72f9e179c421c..d8fca0513b17e 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -1316,6 +1316,7 @@ def _create_chat_result( generations.append(gen) llm_output = { "token_usage": token_usage, + "model_provider": "openai", "model_name": response_dict.get("model", self.model_name), "system_fingerprint": response_dict.get("system_fingerprint", ""), } @@ -1331,7 +1332,18 @@ def _create_chat_result( if hasattr(message, "parsed"): generations[0].message.additional_kwargs["parsed"] = message.parsed if hasattr(message, "refusal"): - generations[0].message.additional_kwargs["refusal"] = message.refusal + if self.output_version in ("v0", "responses/v1"): + generations[0].message.additional_kwargs["refusal"] = ( + message.refusal + ) + elif self.output_version == "v1": + if isinstance(generations[0].message.content, list): + generations[0].message.content.append( + { + "type": "non_standard", + "value": {"refusal": message.refusal}, + } + ) if self.output_version == "v1": _ = llm_output.pop("token_usage", None) @@ -3571,7 +3583,7 @@ def _get_last_messages( msg = messages[i] if isinstance(msg, AIMessage): response_id = msg.response_metadata.get("id") - if response_id: + if response_id and response_id.startswith("resp_"): return messages[i + 1 :], response_id else: return messages, None @@ -3680,23 +3692,45 @@ def _construct_responses_api_payload( return payload -def _make_computer_call_output_from_message(message: ToolMessage) -> dict: - computer_call_output: dict = { - "call_id": message.tool_call_id, - "type": "computer_call_output", - } +def _make_computer_call_output_from_message( + message: ToolMessage, +) -> Optional[dict[str, Any]]: + computer_call_output: Optional[dict[str, Any]] = None if isinstance(message.content, list): - # Use first input_image block - output = next( - block - for block in message.content - if cast(dict, block)["type"] == "input_image" - ) + for block in message.content: + if ( + message.additional_kwargs.get("type") == "computer_call_output" + and isinstance(block, dict) + and block.get("type") == "input_image" + ): + # Use first input_image block + computer_call_output = { + "call_id": message.tool_call_id, + "type": "computer_call_output", + "output": block, + } + break + elif ( + isinstance(block, dict) + and block.get("type") == "non_standard" + and block.get("value", {}).get("type") == "computer_call_output" + ): + computer_call_output = block["value"] + break + else: + pass else: - # string, assume image_url - output = {"type": "input_image", "image_url": message.content} - computer_call_output["output"] = output - if "acknowledged_safety_checks" in message.additional_kwargs: + if message.additional_kwargs.get("type") == "computer_call_output": + # string, assume image_url + computer_call_output = { + "call_id": message.tool_call_id, + "type": "computer_call_output", + "output": {"type": "input_image", "image_url": message.content}, + } + if ( + computer_call_output is not None + and "acknowledged_safety_checks" in message.additional_kwargs + ): computer_call_output["acknowledged_safety_checks"] = message.additional_kwargs[ "acknowledged_safety_checks" ] @@ -3713,6 +3747,15 @@ def _make_custom_tool_output_from_message(message: ToolMessage) -> Optional[dict "output": block.get("output") or "", } break + elif ( + isinstance(block, dict) + and block.get("type") == "non_standard" + and block.get("value", {}).get("type") == "custom_tool_call_output" + ): + custom_tool_output = block["value"] + break + else: + pass return custom_tool_output @@ -3756,14 +3799,14 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list: msg.pop("name") if msg["role"] == "tool": tool_output = msg["content"] + computer_call_output = _make_computer_call_output_from_message( + cast(ToolMessage, lc_msg) + ) custom_tool_output = _make_custom_tool_output_from_message(lc_msg) # type: ignore[arg-type] - if custom_tool_output: - input_.append(custom_tool_output) - elif lc_msg.additional_kwargs.get("type") == "computer_call_output": - computer_call_output = _make_computer_call_output_from_message( - cast(ToolMessage, lc_msg) - ) + if computer_call_output: input_.append(computer_call_output) + elif custom_tool_output: + input_.append(custom_tool_output) else: if not isinstance(tool_output, str): tool_output = _stringify(tool_output) @@ -3939,6 +3982,7 @@ def _construct_lc_result_from_responses_api( if metadata: response_metadata.update(metadata) # for compatibility with chat completion calls. + response_metadata["model_provider"] = "openai" response_metadata["model_name"] = response_metadata.get("model") if response.usage: usage_metadata = _create_usage_metadata_responses(response.usage.model_dump()) diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index 44a2edd158782..4ddf373ec78f1 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -436,7 +436,7 @@ def test_stream_reasoning_summary( else: # v1 total_reasoning_blocks = 0 - for block in response_1.content: + for block in response_1.content_blocks: if block["type"] == "reasoning": total_reasoning_blocks += 1 assert isinstance(block["id"], str) and block["id"].startswith("rs_") @@ -999,14 +999,16 @@ def test_verbosity_parameter() -> None: assert response.content -@pytest.mark.vcr() -def test_custom_tool() -> None: +@pytest.mark.default_cassette("test_custom_tool.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["responses/v1", "v1"]) +def test_custom_tool(output_version: Literal["responses/v1", "v1"]) -> None: @custom_tool def execute_code(code: str) -> str: """Execute python code.""" return "27" - llm = ChatOpenAI(model="gpt-5", output_version="responses/v1").bind_tools( + llm = ChatOpenAI(model="gpt-5", output_version=output_version).bind_tools( [execute_code] ) From 0aac20e65581d75b79e6c7418cd27b727f1113c0 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Thu, 14 Aug 2025 09:55:20 -0400 Subject: [PATCH 17/73] openai: tool calls in progress --- .../langchain_openai/chat_models/_compat.py | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/_compat.py b/libs/partners/openai/langchain_openai/chat_models/_compat.py index 0409c82c943bb..d47aa735e15b5 100644 --- a/libs/partners/openai/langchain_openai/chat_models/_compat.py +++ b/libs/partners/openai/langchain_openai/chat_models/_compat.py @@ -271,8 +271,7 @@ def _convert_to_v1_from_chat_completions(message: AIMessage) -> AIMessage: message.content = [] for tool_call in message.tool_calls: - if id_ := tool_call.get("id"): - message.content.append({"type": "tool_call", "id": id_}) + message.content.append(cast(dict, tool_call)) if "tool_calls" in message.additional_kwargs: _ = message.additional_kwargs.pop("tool_calls") @@ -284,8 +283,23 @@ def _convert_to_v1_from_chat_completions(message: AIMessage) -> AIMessage: def _convert_to_v1_from_chat_completions_chunk(chunk: AIMessageChunk) -> AIMessageChunk: - result = _convert_to_v1_from_chat_completions(cast(AIMessage, chunk)) - return cast(AIMessageChunk, result) + """Mutate a Chat Completions chunk to v1 format.""" + if isinstance(chunk.content, str): + if chunk.content: + chunk.content = [{"type": "text", "text": chunk.content}] + else: + chunk.content = [] + + for tool_call_chunk in chunk.tool_call_chunks: + chunk.content.append(cast(dict, tool_call_chunk)) + + if "tool_calls" in chunk.additional_kwargs: + _ = chunk.additional_kwargs.pop("tool_calls") + + if "token_usage" in chunk.response_metadata: + _ = chunk.response_metadata.pop("token_usage") + + return chunk def _convert_from_v1_to_chat_completions(message: AIMessage) -> AIMessage: From 624300cefa92a91217a8152d802a88d129afcd99 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Thu, 14 Aug 2025 10:06:33 -0400 Subject: [PATCH 18/73] core: populate tool_call_chunks in content_blocks --- libs/core/langchain_core/messages/ai.py | 71 ++++++++++++++----- .../core/tests/unit_tests/messages/test_ai.py | 26 +++++++ 2 files changed, 78 insertions(+), 19 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index b38a7fa1a4924..b383685c6c75d 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -230,25 +230,26 @@ def content_blocks(self) -> list[types.ContentBlock]: """Return content blocks of the message.""" blocks = super().content_blocks - # Add from tool_calls if missing from content - content_tool_call_ids = { - block.get("id") - for block in self.content - if isinstance(block, dict) and block.get("type") == "tool_call" - } - for tool_call in self.tool_calls: - if (id_ := tool_call.get("id")) and id_ not in content_tool_call_ids: - tool_call_block: types.ToolCall = { - "type": "tool_call", - "id": id_, - "name": tool_call["name"], - "args": tool_call["args"], - } - if "index" in tool_call: - tool_call_block["index"] = tool_call["index"] - if "extras" in tool_call: - tool_call_block["extras"] = tool_call["extras"] - blocks.append(tool_call_block) + if self.tool_calls: + # Add from tool_calls if missing from content + content_tool_call_ids = { + block.get("id") + for block in self.content + if isinstance(block, dict) and block.get("type") == "tool_call" + } + for tool_call in self.tool_calls: + if (id_ := tool_call.get("id")) and id_ not in content_tool_call_ids: + tool_call_block: types.ToolCall = { + "type": "tool_call", + "id": id_, + "name": tool_call["name"], + "args": tool_call["args"], + } + if "index" in tool_call: + tool_call_block["index"] = tool_call["index"] + if "extras" in tool_call: + tool_call_block["extras"] = tool_call["extras"] + blocks.append(tool_call_block) return blocks @@ -361,6 +362,38 @@ def lc_attributes(self) -> dict: "invalid_tool_calls": self.invalid_tool_calls, } + @property + def content_blocks(self) -> list[types.ContentBlock]: + """Return content blocks of the message.""" + blocks = super().content_blocks + + if self.tool_call_chunks: + blocks = [ + block + for block in blocks + if block["type"] not in ("tool_call", "invalid_tool_call") + ] + # Add from tool_call_chunks if missing from content + content_tool_call_ids = { + block.get("id") + for block in self.content + if isinstance(block, dict) and block.get("type") == "tool_call_chunk" + } + for chunk in self.tool_call_chunks: + if (id_ := chunk.get("id")) and id_ not in content_tool_call_ids: + tool_call_chunk_block: types.ToolCallChunk = { + "type": "tool_call_chunk", + "id": id_, + "name": chunk["name"], + "args": chunk["args"], + "index": chunk.get("index"), + } + if "extras" in chunk: + tool_call_chunk_block["extras"] = chunk["extras"] # type: ignore[typeddict-item] + blocks.append(tool_call_chunk_block) + + return blocks + @model_validator(mode="after") def init_tool_calls(self) -> Self: """Initialize tool calls from tool call chunks. diff --git a/libs/core/tests/unit_tests/messages/test_ai.py b/libs/core/tests/unit_tests/messages/test_ai.py index a7225015c2cff..b3c0a4d84ea08 100644 --- a/libs/core/tests/unit_tests/messages/test_ai.py +++ b/libs/core/tests/unit_tests/messages/test_ai.py @@ -284,3 +284,29 @@ def test_content_blocks() -> None: {"type": "tool_call", "name": "foo", "args": {"a": "b"}, "id": "abc_123"}, missing_tool_call, ] + + # Chunks + message = AIMessageChunk( + content="", + tool_call_chunks=[ + { + "type": "tool_call_chunk", + "name": "foo", + "args": "", + "id": "abc_123", + "index": 0, + } + ], + ) + assert len(message.content_blocks) == 1 + assert message.content_blocks[0]["type"] == "tool_call_chunk" + assert message.content_blocks == [ + { + "type": "tool_call_chunk", + "name": "foo", + "args": "", + "id": "abc_123", + "index": 0, + } + ] + assert message.content == "" From 7e39cd18c55050bb80d7d2ff6419aa2b66bb836f Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 14:30:32 -0400 Subject: [PATCH 19/73] feat: allow kwargs on content block factories (#32568) --- .../langchain_core/messages/content_blocks.py | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index daf5112507406..656d47cdaa6e7 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -812,6 +812,9 @@ class NonStandardContentBlock(TypedDict): the adapter's job to parse that payload and emit the corresponding standard ``ReasoningContentBlock`` and ``ToolCallContentBlocks``. + Has no ``extras`` field, as provider-specific data should be included in the + ``value`` field. + .. note:: ``create_non_standard_block`` may also be used as a factory to create a ``NonStandardContentBlock``. Benefits include: @@ -1023,6 +1026,7 @@ def create_text_block( id: Optional[str] = None, annotations: Optional[list[Annotation]] = None, index: Optional[int] = None, + **kwargs: Any, ) -> TextContentBlock: """Create a ``TextContentBlock``. @@ -1049,6 +1053,11 @@ def create_text_block( block["annotations"] = annotations if index is not None: block["index"] = index + + extras = {k: v for k, v in kwargs.items() if v is not None} + if extras: + block["extras"] = extras + return block @@ -1060,6 +1069,7 @@ def create_image_block( mime_type: Optional[str] = None, id: Optional[str] = None, index: Optional[int] = None, + **kwargs: Any, ) -> ImageContentBlock: """Create an ``ImageContentBlock``. @@ -1100,6 +1110,10 @@ def create_image_block( if index is not None: block["index"] = index + extras = {k: v for k, v in kwargs.items() if v is not None} + if extras: + block["extras"] = extras + return block @@ -1111,6 +1125,7 @@ def create_video_block( mime_type: Optional[str] = None, id: Optional[str] = None, index: Optional[int] = None, + **kwargs: Any, ) -> VideoContentBlock: """Create a ``VideoContentBlock``. @@ -1155,6 +1170,10 @@ def create_video_block( if index is not None: block["index"] = index + extras = {k: v for k, v in kwargs.items() if v is not None} + if extras: + block["extras"] = extras + return block @@ -1166,6 +1185,7 @@ def create_audio_block( mime_type: Optional[str] = None, id: Optional[str] = None, index: Optional[int] = None, + **kwargs: Any, ) -> AudioContentBlock: """Create an ``AudioContentBlock``. @@ -1210,6 +1230,10 @@ def create_audio_block( if index is not None: block["index"] = index + extras = {k: v for k, v in kwargs.items() if v is not None} + if extras: + block["extras"] = extras + return block @@ -1221,6 +1245,7 @@ def create_file_block( mime_type: Optional[str] = None, id: Optional[str] = None, index: Optional[int] = None, + **kwargs: Any, ) -> FileContentBlock: """Create a ``FileContentBlock``. @@ -1265,6 +1290,10 @@ def create_file_block( if index is not None: block["index"] = index + extras = {k: v for k, v in kwargs.items() if v is not None} + if extras: + block["extras"] = extras + return block @@ -1277,6 +1306,7 @@ def create_plaintext_block( context: Optional[str] = None, id: Optional[str] = None, index: Optional[int] = None, + **kwargs: Any, ) -> PlainTextContentBlock: """Create a ``PlainTextContentBlock``. @@ -1319,6 +1349,10 @@ def create_plaintext_block( if index is not None: block["index"] = index + extras = {k: v for k, v in kwargs.items() if v is not None} + if extras: + block["extras"] = extras + return block @@ -1328,6 +1362,7 @@ def create_tool_call( *, id: Optional[str] = None, index: Optional[int] = None, + **kwargs: Any, ) -> ToolCall: """Create a ``ToolCall``. @@ -1355,6 +1390,10 @@ def create_tool_call( if index is not None: block["index"] = index + extras = {k: v for k, v in kwargs.items() if v is not None} + if extras: + block["extras"] = extras + return block @@ -1362,6 +1401,7 @@ def create_reasoning_block( reasoning: Optional[str] = None, id: Optional[str] = None, index: Optional[int] = None, + **kwargs: Any, ) -> ReasoningContentBlock: """Create a ``ReasoningContentBlock``. @@ -1387,6 +1427,10 @@ def create_reasoning_block( if index is not None: block["index"] = index + extras = {k: v for k, v in kwargs.items() if v is not None} + if extras: + block["extras"] = extras + return block @@ -1398,6 +1442,7 @@ def create_citation( end_index: Optional[int] = None, cited_text: Optional[str] = None, id: Optional[str] = None, + **kwargs: Any, ) -> Citation: """Create a ``Citation``. @@ -1430,6 +1475,10 @@ def create_citation( if cited_text is not None: block["cited_text"] = cited_text + extras = {k: v for k, v in kwargs.items() if v is not None} + if extras: + block["extras"] = extras + return block From c9e847fcb8978894fb25eff2a1c63de1460401ad Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 14:33:59 -0400 Subject: [PATCH 20/73] chore: format `output_version` docstring --- libs/core/langchain_core/language_models/chat_models.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 1eb32ea1d66af..572c805c0be0b 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -335,17 +335,18 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): """ output_version: str = "v0" - """Version of AIMessage output format to use. + """Version of ``AIMessage`` output format to use. - This field is used to roll-out new output formats for chat model AIMessages + This field is used to roll-out new output formats for chat model ``AIMessage``s in a backwards-compatible way. ``'v1'`` standardizes output format using a list of typed ContentBlock dicts. We recommend this for new applications. - All chat models currently support the default of ``"v0"``. + All chat models currently support the default of ``'v0'``. .. versionadded:: 1.0 + """ @model_validator(mode="before") From 8d110599cb9562aaa9c13e1efd796b25258dc588 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 14:39:13 -0400 Subject: [PATCH 21/73] chore: more content block docstring formatting --- libs/core/langchain_core/messages/ai.py | 2 +- libs/core/langchain_core/messages/base.py | 9 +++++---- libs/core/langchain_core/messages/human.py | 2 +- libs/core/langchain_core/messages/system.py | 2 +- libs/core/langchain_core/messages/tool.py | 2 +- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 774fab4611316..207ab6c11331d 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -201,7 +201,7 @@ def __init__( content_blocks: Optional[list[types.ContentBlock]] = None, **kwargs: Any, ) -> None: - """Specify content as a positional arg or content_blocks for typing support.""" + """Specify ``content`` as positional arg or ``content_blocks`` for typing.""" if content_blocks is not None: # If there are tool calls in content_blocks, but not in tool_calls, add them content_tool_calls = [ diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 13b12f764d19e..871f687d35cf6 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -83,7 +83,7 @@ def __init__( content_blocks: Optional[list[types.ContentBlock]] = None, **kwargs: Any, ) -> None: - """Specify content as a positional arg or content_blocks for typing support.""" + """Specify ``content`` as positional arg or ``content_blocks`` for typing.""" if content_blocks is not None: super().__init__(content=content_blocks, **kwargs) else: @@ -108,17 +108,18 @@ def get_lc_namespace(cls) -> list[str]: @property def content_blocks(self) -> list[types.ContentBlock]: - """Return the content as a list of standard ContentBlocks. + """Return the content as a list of standard ``ContentBlock``s. To use this property, the corresponding chat model must support - ``message_version="v1"`` or higher: + ``message_version='v1'`` or higher: .. code-block:: python from langchain.chat_models import init_chat_model llm = init_chat_model("...", message_version="v1") - otherwise, does best-effort parsing to standard types. + Otherwise, does best-effort parsing to standard types. + """ blocks: list[types.ContentBlock] = [] content = ( diff --git a/libs/core/langchain_core/messages/human.py b/libs/core/langchain_core/messages/human.py index 041db0cdb9726..954f05f037ec2 100644 --- a/libs/core/langchain_core/messages/human.py +++ b/libs/core/langchain_core/messages/human.py @@ -63,7 +63,7 @@ def __init__( content_blocks: Optional[list[types.ContentBlock]] = None, **kwargs: Any, ) -> None: - """Specify content as a positional arg or content_blocks for typing support.""" + """Specify ``content`` as positional arg or ``content_blocks`` for typing.""" if content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), diff --git a/libs/core/langchain_core/messages/system.py b/libs/core/langchain_core/messages/system.py index c8a5bbae5c859..a3f399b88c142 100644 --- a/libs/core/langchain_core/messages/system.py +++ b/libs/core/langchain_core/messages/system.py @@ -56,7 +56,7 @@ def __init__( content_blocks: Optional[list[types.ContentBlock]] = None, **kwargs: Any, ) -> None: - """Specify content as a positional arg or content_blocks for typing support.""" + """Specify ``content`` as positional arg or ``content_blocks`` for typing.""" if content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), diff --git a/libs/core/langchain_core/messages/tool.py b/libs/core/langchain_core/messages/tool.py index 14177181480fc..efb714bb58506 100644 --- a/libs/core/langchain_core/messages/tool.py +++ b/libs/core/langchain_core/messages/tool.py @@ -157,7 +157,7 @@ def __init__( content_blocks: Optional[list[types.ContentBlock]] = None, **kwargs: Any, ) -> None: - """Specify content as a positional arg or content_blocks for typing support.""" + """Specify ``content`` as positional arg or ``content_blocks`` for typing.""" if content_blocks is not None: super().__init__( content=cast("Union[str, list[Union[str, dict]]]", content_blocks), From 3db8c6011236acf1b9076cc2ffa9aec2d947a6eb Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 15:01:07 -0400 Subject: [PATCH 22/73] chore: more content block formatting --- .../langchain_core/messages/content_blocks.py | 235 +++++++++++------- 1 file changed, 141 insertions(+), 94 deletions(-) diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index 656d47cdaa6e7..1448d37086b42 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -4,17 +4,15 @@ This module is under active development. The API is unstable and subject to change in future releases. -This module provides a standardized data structure for representing inputs to and -outputs from LLMs. The core abstraction is the **Content Block**, a ``TypedDict`` that -can represent a piece of text, an image, a tool call, or other structured data. +This module provides standardized data structures for representing inputs to and +outputs from LLMs. The core abstraction is the **Content Block**, a ``TypedDict``. **Rationale** Different LLM providers use distinct and incompatible API schemas. This module -introduces a unified, provider-agnostic format to standardize these interactions. A -message to or from a model is simply a ``list`` of ``ContentBlock`` objects, allowing -for the natural interleaving of text, images, and other content in a single, ordered -sequence. +provides a unified, provider-agnostic format to facilitate these interactions. A +message to or from a model is simply a list of content blocks, allowing for the natural +interleaving of text, images, and other content in a single ordered sequence. An adapter for a specific provider is responsible for translating this standard list of blocks into the format required by its API. @@ -29,61 +27,70 @@ by default in the ``extras`` field of each block. This allows for additional metadata to be included without breaking the standard structure. -Following widespread adoption of `PEP 728 `__, we will add -``extra_items=Any`` as a param to Content Blocks. This will signify to type checkers -that additional provider-specific fields are allowed outside of the ``extras`` field, -and that will become the new standard approach to adding provider-specific metadata. - .. warning:: Do not heavily rely on the ``extras`` field for provider-specific data! This field is subject to deprecation in future releases as we move towards PEP 728. -**Example with PEP 728 provider-specific fields:** +.. note:: + Following widespread adoption of `PEP 728 `__, we + will add ``extra_items=Any`` as a param to Content Blocks. This will signify to type + checkers that additional provider-specific fields are allowed outside of the + ``extras`` field, and that will become the new standard approach to adding + provider-specific metadata. -.. code-block:: python + .. dropdown:: - # Content block definition - # NOTE: `extra_items=Any` - class TextContentBlock(TypedDict, extra_items=Any): - type: Literal["text"] - id: NotRequired[str] - text: str - annotations: NotRequired[list[Annotation]] - index: NotRequired[int] + **Example with PEP 728 provider-specific fields:** -.. code-block:: python + .. code-block:: python - from langchain_core.messages.content_blocks import TextContentBlock + # Content block definition + # NOTE: `extra_items=Any` + class TextContentBlock(TypedDict, extra_items=Any): + type: Literal["text"] + id: NotRequired[str] + text: str + annotations: NotRequired[list[Annotation]] + index: NotRequired[int] - # Create a text content block with provider-specific fields - my_block: TextContentBlock = { - # Add required fields - "type": "text", - "text": "Hello, world!", - # Additional fields not specified in the TypedDict - # These are valid with PEP 728 and are typed as Any - "openai_metadata": {"model": "gpt-4", "temperature": 0.7}, - "anthropic_usage": {"input_tokens": 10, "output_tokens": 20}, - "custom_field": "any value", - } + .. code-block:: python - # Mutating an existing block to add provider-specific fields - openai_data = my_block["openai_metadata"] # Type: Any + from langchain_core.messages.content_blocks import TextContentBlock + + # Create a text content block with provider-specific fields + my_block: TextContentBlock = { + # Add required fields + "type": "text", + "text": "Hello, world!", + # Additional fields not specified in the TypedDict + # These are valid with PEP 728 and are typed as Any + "openai_metadata": {"model": "gpt-4", "temperature": 0.7}, + "anthropic_usage": {"input_tokens": 10, "output_tokens": 20}, + "custom_field": "any value", + } -.. note:: - PEP 728 is enabled with ``# type: ignore[call-arg]`` comments to suppress warnings - from type checkers that don't yet support it. The functionality works correctly - in Python 3.13+ and will be fully supported as the ecosystem catches up. + # Mutating an existing block to add provider-specific fields + openai_data = my_block["openai_metadata"] # Type: Any + + PEP 728 is enabled with ``# type: ignore[call-arg]`` comments to suppress + warnings from type checkers that don't yet support it. The functionality works + correctly in Python 3.13+ and will be fully supported as the ecosystem catches + up. **Key Block Types** The module defines several types of content blocks, including: -- ``TextContentBlock``: Standard text. -- ``ImageContentBlock``, ``Audio...``, ``Video...``, ``PlainText...``, ``File...``: For multimodal data. +- ``TextContentBlock``: Standard text output. +- ``Citation``: For annotations that link text output to a source document. - ``ToolCallContentBlock``: For function calling. - ``ReasoningContentBlock``: To capture a model's thought process. -- ``Citation``: For annotations that link generated text to a source document. +- Multimodal data: + - ``ImageContentBlock`` + - ``AudioContentBlock`` + - ``VideoContentBlock`` + - ``PlainTextContentBlock`` (e.g. .txt or .md files) + - ``FileContentBlock`` (e.g. PDFs, etc.) **Example Usage** @@ -92,28 +99,31 @@ class TextContentBlock(TypedDict, extra_items=Any): # Direct construction: from langchain_core.messages.content_blocks import TextContentBlock, ImageContentBlock - multimodal_message: AIMessage = [ - TextContentBlock(type="text", text="What is shown in this image?"), - ImageContentBlock( - type="image", - url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png", - mime_type="image/png", - ), - ] + multimodal_message: AIMessage(content_blocks= + [ + TextContentBlock(type="text", text="What is shown in this image?"), + ImageContentBlock( + type="image", + url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png", + mime_type="image/png", + ), + ] + ) + # Using factories: from langchain_core.messages.content_blocks import create_text_block, create_image_block - # Using factory functions: - multimodal_message: AIMessage = [ - create_text_block("What is shown in this image?"), - create_image_block( - url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png", - mime_type="image/png", - ), - ] - -Factory functions like ``create_text_block`` and ``create_image_block`` are provided -and offer benefits such as: + multimodal_message: AIMessage(content= + [ + create_text_block("What is shown in this image?"), + create_image_block( + url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png", + mime_type="image/png", + ), + ] + ) + +Factory functions offer benefits such as: - Automatic ID generation (when not provided) - No need to manually specify the ``type`` field @@ -136,7 +146,7 @@ def _ensure_id(id_val: Optional[str]) -> str: id_val: Optional string ID value to validate. Returns: - A valid string ID, either the provided value or a new UUID. + A string ID, either the validated provided value or a newly generated UUID4. """ return id_val or str(f"lc_{uuid4()}") @@ -166,18 +176,12 @@ class Citation(TypedDict): - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ url: NotRequired[str] """URL of the document source.""" - # For future consideration, if needed: - # provenance: NotRequired[str] - # """Provenance of the document, e.g., ``'Wikipedia'``, ``'arXiv'``, etc. - - # Included for future compatibility; not currently implemented. - # """ - title: NotRequired[str] """Source document title. @@ -185,12 +189,10 @@ class Citation(TypedDict): """ start_index: NotRequired[int] - """Start index of the **response text** (``TextContentBlock.text``) for which the - annotation applies.""" + """Start index of the **response text** (``TextContentBlock.text``).""" end_index: NotRequired[int] - """End index of the **response text** (``TextContentBlock.text``) for which the - annotation applies.""" + """End index of the **response text** (``TextContentBlock.text``)""" cited_text: NotRequired[str] """Excerpt of source text being cited.""" @@ -212,10 +214,12 @@ class NonStandardAnnotation(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ value: dict[str, Any] @@ -244,10 +248,12 @@ class TextContentBlock(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ text: str @@ -296,6 +302,7 @@ class ToolCall(TypedDict): An identifier is needed to associate a tool call request with a tool call result in events when multiple concurrent tool calls are made. + """ # TODO: Consider making this NotRequired[str] in the future. @@ -330,6 +337,7 @@ class ToolCallChunk(TypedDict): AIMessageChunk(content="", tool_call_chunks=left_chunks) + AIMessageChunk(content="", tool_call_chunks=right_chunks) ).tool_call_chunks == [ToolCallChunk(name='foo', args='{"a":1}', index=0)] + """ # TODO: Consider making fields NotRequired[str] in the future. @@ -338,7 +346,12 @@ class ToolCallChunk(TypedDict): """Used for serialization.""" id: Optional[str] - """An identifier associated with the tool call.""" + """An identifier associated with the tool call. + + An identifier is needed to associate a tool call request with a tool + call result in events when multiple concurrent tool calls are made. + + """ name: Optional[str] """The name of the tool to be called.""" @@ -358,6 +371,7 @@ class InvalidToolCall(TypedDict): Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) + """ # TODO: Consider making fields NotRequired[str] in the future. @@ -366,7 +380,12 @@ class InvalidToolCall(TypedDict): """Used for discrimination.""" id: Optional[str] - """An identifier associated with the tool call.""" + """An identifier associated with the tool call. + + An identifier is needed to associate a tool call request with a tool + call result in events when multiple concurrent tool calls are made. + + """ name: Optional[str] """The name of the tool to be called.""" @@ -384,8 +403,6 @@ class InvalidToolCall(TypedDict): """Provider-specific metadata.""" -# Note: These are not standard tool calls, but rather provider-specific built-in tools. -# Web search class WebSearchCall(TypedDict): """Built-in web search tool call.""" @@ -393,10 +410,12 @@ class WebSearchCall(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ query: NotRequired[str] @@ -416,10 +435,12 @@ class WebSearchResult(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ urls: NotRequired[list[str]] @@ -439,10 +460,12 @@ class CodeInterpreterCall(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ language: NotRequired[str] @@ -463,22 +486,26 @@ class CodeInterpreterOutput(TypedDict): Full output of a code interpreter tool call is represented by ``CodeInterpreterResult`` which is a list of these blocks. + """ type: Literal["code_interpreter_output"] """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ return_code: NotRequired[int] """Return code of the executed code. Example: ``0`` for success, non-zero for failure. + """ stderr: NotRequired[str] @@ -504,10 +531,12 @@ class CodeInterpreterResult(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ output: list[CodeInterpreterOutput] @@ -536,10 +565,12 @@ class ReasoningContentBlock(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ reasoning: NotRequired[str] @@ -547,6 +578,7 @@ class ReasoningContentBlock(TypedDict): Either the thought summary or the raw reasoning text itself. This is often parsed from ```` tags in the model's response. + """ index: NotRequired[int] @@ -575,10 +607,12 @@ class ImageContentBlock(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ file_id: NotRequired[str] @@ -588,6 +622,7 @@ class ImageContentBlock(TypedDict): """MIME type of the image. Required for base64. `Examples from IANA `__ + """ index: NotRequired[int] @@ -619,10 +654,12 @@ class VideoContentBlock(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ file_id: NotRequired[str] @@ -632,6 +669,7 @@ class VideoContentBlock(TypedDict): """MIME type of the video. Required for base64. `Examples from IANA `__ + """ index: NotRequired[int] @@ -662,10 +700,12 @@ class AudioContentBlock(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ file_id: NotRequired[str] @@ -711,10 +751,12 @@ class PlainTextContentBlock(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ file_id: NotRequired[str] @@ -768,10 +810,12 @@ class FileContentBlock(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ file_id: NotRequired[str] @@ -828,10 +872,12 @@ class NonStandardContentBlock(TypedDict): """Type of the content block. Used for discrimination.""" id: NotRequired[str] - """Content block identifier. Either: + """Content block identifier. + Either: - Generated by the provider (e.g., OpenAI's file ID) - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``)) + """ value: dict[str, Any] @@ -900,6 +946,7 @@ def is_data_content_block(block: dict) -> bool: Returns: True if the content block is a data content block, False otherwise. + """ return block.get("type") in ( "audio", @@ -947,7 +994,7 @@ def is_invalid_tool_call_block( def convert_to_openai_image_block(block: dict[str, Any]) -> dict: - """Convert image content block to format expected by OpenAI Chat Completions API.""" + """Convert ``ImageContentBlock`` to format expected by OpenAI Chat Completions.""" if "url" in block: return { "type": "image_url", From 301a425151cfc2dc7054d2f5247453f3e0e3ee66 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 15:16:07 -0400 Subject: [PATCH 23/73] snapshot --- .../__snapshots__/test_runnable.ambr | 248 ++++++++++++++++-- 1 file changed, 224 insertions(+), 24 deletions(-) diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index 079e490906158..db17757a8c95f 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -2674,7 +2674,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -2700,6 +2700,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -2711,6 +2715,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -2728,9 +2736,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -2943,12 +2952,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -2960,6 +2980,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -2970,9 +2994,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -4150,7 +4175,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -4176,6 +4201,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -4187,6 +4216,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -4204,9 +4237,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -4438,12 +4472,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -4455,6 +4500,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -4465,9 +4514,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -5657,7 +5707,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -5683,6 +5733,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -5694,6 +5748,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -5711,9 +5769,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -5945,12 +6004,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -5962,6 +6032,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -5972,9 +6046,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -7039,7 +7114,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -7065,6 +7140,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -7076,6 +7155,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -7093,9 +7176,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -7308,12 +7392,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -7325,6 +7420,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -7335,9 +7434,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -8557,7 +8657,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -8583,6 +8683,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -8594,6 +8698,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -8611,9 +8719,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -8845,12 +8954,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -8862,6 +8982,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -8872,9 +8996,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -9984,7 +10109,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -10010,6 +10135,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -10021,6 +10150,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -10038,9 +10171,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -10253,12 +10387,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -10270,6 +10415,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -10280,9 +10429,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -11410,7 +11560,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -11436,6 +11586,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -11447,6 +11601,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -11464,9 +11622,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -11709,12 +11868,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -11726,6 +11896,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -11736,9 +11910,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -12878,7 +13053,7 @@ 'description': ''' Allowance for errors made by LLM. - Here we add an `error` key to surface errors made during generation + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', 'properties': dict({ @@ -12904,6 +13079,10 @@ ]), 'title': 'Error', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -12915,6 +13094,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'anyOf': list([ dict({ @@ -12932,9 +13115,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', 'error', ]), 'title': 'InvalidToolCall', @@ -13166,12 +13350,23 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". + + .. note:: + ``create_tool_call`` may also be used as a factory to create a + ``ToolCall``. Benefits include: + + * Automatic ID generation (when not provided) + * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), + 'extras': dict({ + 'title': 'Extras', + 'type': 'object', + }), 'id': dict({ 'anyOf': list([ dict({ @@ -13183,6 +13378,10 @@ ]), 'title': 'Id', }), + 'index': dict({ + 'title': 'Index', + 'type': 'integer', + }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -13193,9 +13392,10 @@ }), }), 'required': list([ + 'type', + 'id', 'name', 'args', - 'id', ]), 'title': 'ToolCall', 'type': 'object', From a3b20b0ef575a1a6afa8b592c6f219a2fdf7dac3 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 15:28:11 -0400 Subject: [PATCH 24/73] clean up id test --- libs/core/tests/unit_tests/test_messages.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index 3b591945954b6..c3304b5917361 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -180,22 +180,21 @@ def test_message_chunks() -> None: assert AIMessageChunk(content="") + left == left assert right + AIMessageChunk(content="") == right + default_id = "lc_run--abc123" + meaningful_id = "msg_def456" + # Test ID order of precedence - null_id = AIMessageChunk(content="", id=None) - default_id = AIMessageChunk( - content="", id="lc_run--abc123" + null_id_chunk = AIMessageChunk(content="", id=None) + default_id_chunk = AIMessageChunk( + content="", id=default_id ) # LangChain-assigned run ID - meaningful_id = AIMessageChunk(content="", id="msg_def456") # provider-assigned ID - - assert (null_id + default_id).id == "lc_run--abc123" - assert (default_id + null_id).id == "lc_run--abc123" + meaningful_id = AIMessageChunk(content="", id=meaningful_id) # provider-assigned ID - assert (null_id + meaningful_id).id == "msg_def456" - assert (meaningful_id + null_id).id == "msg_def456" + assert (null_id_chunk + default_id_chunk).id == default_id + assert (null_id_chunk + meaningful_id).id == meaningful_id # Provider assigned IDs have highest precedence - assert (default_id + meaningful_id).id == "msg_def456" - assert (meaningful_id + default_id).id == "msg_def456" + assert (default_id_chunk + meaningful_id).id == meaningful_id def test_chat_message_chunks() -> None: From 8fc1973bbf657d744817e93a1a1f6aba455c38f6 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 15:30:51 -0400 Subject: [PATCH 25/73] test: add note about for tuple conversion in ToolMessage --- libs/core/tests/unit_tests/test_messages.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index c3304b5917361..281e964a1aed0 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -1040,6 +1040,7 @@ def test_tool_message_content() -> None: ToolMessage(["foo"], tool_call_id="1") ToolMessage([{"foo": "bar"}], tool_call_id="1") + # Ignoring since we're testing that tuples get converted to lists in `coerce_args` assert ToolMessage(("a", "b", "c"), tool_call_id="1").content == ["a", "b", "c"] # type: ignore[call-overload] assert ToolMessage(5, tool_call_id="1").content == "5" # type: ignore[call-overload] assert ToolMessage(5.1, tool_call_id="1").content == "5.1" # type: ignore[call-overload] From 86252d2ae6a860c96e8dd61505601d8d2128d63b Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 15:39:36 -0400 Subject: [PATCH 26/73] refactor: move ID prefixes --- libs/core/langchain_core/messages/ai.py | 18 +++--------------- .../langchain_core/messages/content_blocks.py | 14 +++++++++++++- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 207ab6c11331d..76bc8d980a1d1 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -26,18 +26,6 @@ logger = logging.getLogger(__name__) -_LC_AUTO_PREFIX = "lc_" -"""LangChain auto-generated ID prefix for messages and content blocks.""" - -_LC_ID_PREFIX = f"{_LC_AUTO_PREFIX}run-" -"""Internal tracing/callback system identifier. - -Used for: -- Tracing. Every LangChain operation (LLM call, chain execution, tool use, etc.) - gets a unique run_id (UUID) -- Enables tracking parent-child relationships between operations -""" - class InputTokenDetails(TypedDict, total=False): """Breakdown of input token counts. @@ -523,15 +511,15 @@ def add_ai_message_chunks( for id_ in candidates: if ( id_ - and not id_.startswith(_LC_ID_PREFIX) - and not id_.startswith(_LC_AUTO_PREFIX) + and not id_.startswith(types.LC_ID_PREFIX) + and not id_.startswith(types.LC_AUTO_PREFIX) ): chunk_id = id_ break else: # second pass: prefer lc_run-* ids over lc_* ids for id_ in candidates: - if id_ and id_.startswith(_LC_ID_PREFIX): + if id_ and id_.startswith(types.LC_ID_PREFIX): chunk_id = id_ break else: diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index 1448d37086b42..32673f1d3cce2 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -135,6 +135,18 @@ class TextContentBlock(TypedDict, extra_items=Any): from typing_extensions import NotRequired, TypedDict, TypeGuard +LC_AUTO_PREFIX = "lc_" +"""LangChain auto-generated ID prefix for messages and content blocks.""" + +LC_ID_PREFIX = f"{LC_AUTO_PREFIX}run-" +"""Internal tracing/callback system identifier. + +Used for: +- Tracing. Every LangChain operation (LLM call, chain execution, tool use, etc.) + gets a unique run_id (UUID) +- Enables tracking parent-child relationships between operations +""" + def _ensure_id(id_val: Optional[str]) -> str: """Ensure the ID is a valid string, generating a new UUID if not provided. @@ -148,7 +160,7 @@ def _ensure_id(id_val: Optional[str]) -> str: Returns: A string ID, either the validated provided value or a newly generated UUID4. """ - return id_val or str(f"lc_{uuid4()}") + return id_val or str(f"{LC_AUTO_PREFIX}{uuid4()}") class Citation(TypedDict): From f691dc348f7c2caf2a84cdf65fc684bdc2dafc26 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 15:42:17 -0400 Subject: [PATCH 27/73] refactor: make `ensure_id` public --- libs/core/langchain_core/messages/__init__.py | 7 ++++++ .../langchain_core/messages/content_blocks.py | 22 +++++++++---------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/libs/core/langchain_core/messages/__init__.py b/libs/core/langchain_core/messages/__init__.py index 410299ea5b553..a0482bee86231 100644 --- a/libs/core/langchain_core/messages/__init__.py +++ b/libs/core/langchain_core/messages/__init__.py @@ -33,6 +33,8 @@ ) from langchain_core.messages.chat import ChatMessage, ChatMessageChunk from langchain_core.messages.content_blocks import ( + LC_AUTO_PREFIX, + LC_ID_PREFIX, Annotation, AudioContentBlock, Citation, @@ -53,6 +55,7 @@ WebSearchResult, convert_to_openai_data_block, convert_to_openai_image_block, + ensure_id, is_data_content_block, is_reasoning_block, is_text_block, @@ -85,6 +88,8 @@ ) __all__ = ( + "LC_AUTO_PREFIX", + "LC_ID_PREFIX", "AIMessage", "AIMessageChunk", "Annotation", @@ -128,6 +133,7 @@ "convert_to_openai_data_block", "convert_to_openai_image_block", "convert_to_openai_messages", + "ensure_id", "filter_messages", "get_buffer_string", "is_data_content_block", @@ -145,6 +151,7 @@ ) _dynamic_imports = { + "ensure_id": "content_blocks", "AIMessage": "ai", "AIMessageChunk": "ai", "Annotation": "content_blocks", diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index 32673f1d3cce2..ac00966814a4f 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -148,7 +148,7 @@ class TextContentBlock(TypedDict, extra_items=Any): """ -def _ensure_id(id_val: Optional[str]) -> str: +def ensure_id(id_val: Optional[str]) -> str: """Ensure the ID is a valid string, generating a new UUID if not provided. Auto-generated UUIDs are prefixed by ``'lc_'`` to indicate they are @@ -1106,7 +1106,7 @@ def create_text_block( block = TextContentBlock( type="text", text=text, - id=_ensure_id(id), + id=ensure_id(id), ) if annotations is not None: block["annotations"] = annotations @@ -1156,7 +1156,7 @@ def create_image_block( msg = "Must provide one of: url, base64, or file_id" raise ValueError(msg) - block = ImageContentBlock(type="image", id=_ensure_id(id)) + block = ImageContentBlock(type="image", id=ensure_id(id)) if url is not None: block["url"] = url @@ -1216,7 +1216,7 @@ def create_video_block( msg = "mime_type is required when using base64 data" raise ValueError(msg) - block = VideoContentBlock(type="video", id=_ensure_id(id)) + block = VideoContentBlock(type="video", id=ensure_id(id)) if url is not None: block["url"] = url @@ -1276,7 +1276,7 @@ def create_audio_block( msg = "mime_type is required when using base64 data" raise ValueError(msg) - block = AudioContentBlock(type="audio", id=_ensure_id(id)) + block = AudioContentBlock(type="audio", id=ensure_id(id)) if url is not None: block["url"] = url @@ -1336,7 +1336,7 @@ def create_file_block( msg = "mime_type is required when using base64 data" raise ValueError(msg) - block = FileContentBlock(type="file", id=_ensure_id(id)) + block = FileContentBlock(type="file", id=ensure_id(id)) if url is not None: block["url"] = url @@ -1390,7 +1390,7 @@ def create_plaintext_block( block = PlainTextContentBlock( type="text-plain", mime_type="text/plain", - id=_ensure_id(id), + id=ensure_id(id), ) if text is not None: @@ -1443,7 +1443,7 @@ def create_tool_call( type="tool_call", name=name, args=args, - id=_ensure_id(id), + id=ensure_id(id), ) if index is not None: @@ -1480,7 +1480,7 @@ def create_reasoning_block( block = ReasoningContentBlock( type="reasoning", reasoning=reasoning or "", - id=_ensure_id(id), + id=ensure_id(id), ) if index is not None: @@ -1521,7 +1521,7 @@ def create_citation( prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID. """ - block = Citation(type="citation", id=_ensure_id(id)) + block = Citation(type="citation", id=ensure_id(id)) if url is not None: block["url"] = url @@ -1565,7 +1565,7 @@ def create_non_standard_block( block = NonStandardContentBlock( type="non_standard", value=value, - id=_ensure_id(id), + id=ensure_id(id), ) if index is not None: From 7a8c6398a422c86665cc86c644c92d3ed1a8f4a1 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 16:01:29 -0400 Subject: [PATCH 28/73] clarify: meaning of provider --- libs/core/tests/unit_tests/test_messages.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index 281e964a1aed0..ae837214b70fb 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -188,13 +188,15 @@ def test_message_chunks() -> None: default_id_chunk = AIMessageChunk( content="", id=default_id ) # LangChain-assigned run ID - meaningful_id = AIMessageChunk(content="", id=meaningful_id) # provider-assigned ID + provider_chunk = AIMessageChunk( + content="", id=meaningful_id + ) # provided ID (either by user or provider) assert (null_id_chunk + default_id_chunk).id == default_id - assert (null_id_chunk + meaningful_id).id == meaningful_id + assert (null_id_chunk + provider_chunk).id == meaningful_id # Provider assigned IDs have highest precedence - assert (default_id_chunk + meaningful_id).id == meaningful_id + assert (default_id_chunk + provider_chunk).id == meaningful_id def test_chat_message_chunks() -> None: From 987031f86c306daeb85651aeefaec78aaae95f31 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 16:27:08 -0400 Subject: [PATCH 29/73] fix: `_LC_ID_PREFIX` back --- libs/core/langchain_core/messages/__init__.py | 2 ++ libs/core/langchain_core/messages/ai.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/libs/core/langchain_core/messages/__init__.py b/libs/core/langchain_core/messages/__init__.py index a0482bee86231..3787da41e8f70 100644 --- a/libs/core/langchain_core/messages/__init__.py +++ b/libs/core/langchain_core/messages/__init__.py @@ -21,6 +21,7 @@ if TYPE_CHECKING: from langchain_core.messages.ai import ( + _LC_ID_PREFIX, AIMessage, AIMessageChunk, ) @@ -90,6 +91,7 @@ __all__ = ( "LC_AUTO_PREFIX", "LC_ID_PREFIX", + "_LC_ID_PREFIX", "AIMessage", "AIMessageChunk", "Annotation", diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 76bc8d980a1d1..e2e045c71071a 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -26,6 +26,8 @@ logger = logging.getLogger(__name__) +_LC_ID_PREFIX = types.LC_ID_PREFIX + class InputTokenDetails(TypedDict, total=False): """Breakdown of input token counts. From 08cd5bb9b4b45b235ac9eb0166fed479880bb806 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 16:27:47 -0400 Subject: [PATCH 30/73] clarify intent of `extras` under data blocks --- libs/core/langchain_core/messages/content_blocks.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index ac00966814a4f..d98e0ff09ce4d 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -647,7 +647,7 @@ class ImageContentBlock(TypedDict): """Data as a base64 string.""" extras: NotRequired[dict[str, Any]] - """Provider-specific metadata.""" + """Provider-specific metadata. This shouldn't be used for the image data itself.""" class VideoContentBlock(TypedDict): @@ -694,7 +694,7 @@ class VideoContentBlock(TypedDict): """Data as a base64 string.""" extras: NotRequired[dict[str, Any]] - """Provider-specific metadata.""" + """Provider-specific metadata. This shouldn't be used for the video data itself.""" class AudioContentBlock(TypedDict): @@ -740,7 +740,7 @@ class AudioContentBlock(TypedDict): """Data as a base64 string.""" extras: NotRequired[dict[str, Any]] - """Provider-specific metadata.""" + """Provider-specific metadata. This shouldn't be used for the audio data itself.""" class PlainTextContentBlock(TypedDict): @@ -796,7 +796,7 @@ class PlainTextContentBlock(TypedDict): """Context for the text, e.g., a description or summary of the text's content.""" extras: NotRequired[dict[str, Any]] - """Provider-specific metadata.""" + """Provider-specific metadata. This shouldn't be used for the data itself.""" class FileContentBlock(TypedDict): @@ -850,7 +850,7 @@ class FileContentBlock(TypedDict): """Data as a base64 string.""" extras: NotRequired[dict[str, Any]] - """Provider-specific metadata.""" + """Provider-specific metadata. This shouldn't be used for the file data itself.""" # Future modalities to consider: From 7f9727ee088551937951b48e2b541389d5931f3c Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 16:28:33 -0400 Subject: [PATCH 31/73] refactor: `is_data_content_block` --- .../langchain_core/messages/content_blocks.py | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index d98e0ff09ce4d..9a3313488f5bc 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -130,7 +130,7 @@ class TextContentBlock(TypedDict, extra_items=Any): """ # noqa: E501 import warnings -from typing import Any, Literal, Optional, Union +from typing import Any, Literal, Optional, Union, get_args, get_type_hints from uuid import uuid4 from typing_extensions import NotRequired, TypedDict, TypeGuard @@ -950,8 +950,24 @@ class NonStandardContentBlock(TypedDict): } +def _get_data_content_block_types() -> tuple[str, ...]: + """Get type literals from DataContentBlock union members dynamically.""" + data_block_types = [] + + for block_type in get_args(DataContentBlock): + hints = get_type_hints(block_type) + if "type" in hints: + type_annotation = hints["type"] + if hasattr(type_annotation, "__args__"): + # This is a Literal type, get the literal value + literal_value = type_annotation.__args__[0] + data_block_types.append(literal_value) + + return tuple(data_block_types) + + def is_data_content_block(block: dict) -> bool: - """Check if the content block is a standard data content block. + """Check if the provided content block is a standard v1 data content block. Args: block: The content block to check. @@ -960,20 +976,19 @@ def is_data_content_block(block: dict) -> bool: True if the content block is a data content block, False otherwise. """ - return block.get("type") in ( - "audio", - "image", - "video", - "file", - "text-plain", - ) and any( + return block.get("type") in _get_data_content_block_types() and any( + # Check if at least one non-type key is present to signify presence of data key in block for key in ( "url", "base64", "file_id", "text", - "source_type", # backwards compatibility + "source_type", # for backwards compatibility with v0 content blocks + # TODO: should we verify that if source_type is present, at least one of + # url, base64, or file_id is also present? Otherwise, source_type could be + # present without any actual data? Need to confirm whether this was ever + # possible in v0 content blocks in the first place. ) ) From 00345c4de93a37cbd0297f8ddb3b4916cd1a1b37 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 16:28:46 -0400 Subject: [PATCH 32/73] tests: add more data content block tests --- libs/core/tests/unit_tests/test_messages.py | 76 ++++++++++++++------- 1 file changed, 53 insertions(+), 23 deletions(-) diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index ae837214b70fb..6fc42db829b36 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -1118,19 +1118,41 @@ def test_message_text() -> None: def test_is_data_content_block() -> None: + # Test all DataContentBlock types with various data fields + + # Image blocks + assert is_data_content_block({"type": "image", "url": "https://..."}) assert is_data_content_block( - { - "type": "image", - "url": "https://...", - } + {"type": "image", "base64": "", "mime_type": "image/jpeg"} ) + + # Video blocks + assert is_data_content_block({"type": "video", "url": "https://video.mp4"}) assert is_data_content_block( - { - "type": "image", - "base64": "", - "mime_type": "image/jpeg", - } + {"type": "video", "base64": "", "mime_type": "video/mp4"} ) + assert is_data_content_block({"type": "video", "file_id": "vid_123"}) + + # Audio blocks + assert is_data_content_block({"type": "audio", "url": "https://audio.mp3"}) + assert is_data_content_block( + {"type": "audio", "base64": "", "mime_type": "audio/mp3"} + ) + assert is_data_content_block({"type": "audio", "file_id": "aud_123"}) + + # Plain text blocks + assert is_data_content_block({"type": "text-plain", "text": "document content"}) + assert is_data_content_block({"type": "text-plain", "url": "https://doc.txt"}) + assert is_data_content_block({"type": "text-plain", "file_id": "txt_123"}) + + # File blocks + assert is_data_content_block({"type": "file", "url": "https://file.pdf"}) + assert is_data_content_block( + {"type": "file", "base64": "", "mime_type": "application/pdf"} + ) + assert is_data_content_block({"type": "file", "file_id": "file_123"}) + + # Blocks with additional metadata (should still be valid) assert is_data_content_block( { "type": "image", @@ -1150,27 +1172,35 @@ def test_is_data_content_block() -> None: assert is_data_content_block( { "type": "image", - "source_type": "base64", # backward compatibility - } - ) - assert not is_data_content_block( - { - "type": "text", - "text": "foo", + "base64": "", + "mime_type": "image/jpeg", + "extras": "hi", } ) + + # Invalid cases - wrong type + assert not is_data_content_block({"type": "text", "text": "foo"}) assert not is_data_content_block( { "type": "image_url", "image_url": {"url": "https://..."}, - } - ) - assert not is_data_content_block( - { - "type": "image", - "source": "", - } + } # This is OpenAI Chat Completions ) + assert not is_data_content_block({"type": "tool_call", "name": "func", "args": {}}) + assert not is_data_content_block({"type": "invalid", "url": "something"}) + + # Invalid cases - valid type but no data or `source_type` fields + assert not is_data_content_block({"type": "image"}) + assert not is_data_content_block({"type": "video", "mime_type": "video/mp4"}) + assert not is_data_content_block({"type": "audio", "extras": {"key": "value"}}) + + # Invalid cases - valid type but wrong data field name + assert not is_data_content_block({"type": "image", "source": ""}) + assert not is_data_content_block({"type": "video", "data": "video_data"}) + + # Edge cases - empty or missing values + assert not is_data_content_block({}) + assert not is_data_content_block({"url": "https://..."}) # missing type def test_convert_to_openai_image_block() -> None: From 0199b56bda27dbbbc0fb2b88988021c5fa9c994c Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 16:37:39 -0400 Subject: [PATCH 33/73] rfc `test_utils` to make clearer what was existing before and after, and add comments --- .../tests/unit_tests/messages/test_utils.py | 57 ++++++++++--------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/libs/core/tests/unit_tests/messages/test_utils.py b/libs/core/tests/unit_tests/messages/test_utils.py index f9f1c9c9ff081..d655fd13bdbf5 100644 --- a/libs/core/tests/unit_tests/messages/test_utils.py +++ b/libs/core/tests/unit_tests/messages/test_utils.py @@ -1215,36 +1215,22 @@ def test_convert_to_openai_messages_developer() -> None: def test_convert_to_openai_messages_multimodal() -> None: + """v0 and v1 content to OpenAI messages conversion.""" messages = [ HumanMessage( content=[ + # Prior v0 blocks {"type": "text", "text": "Text message"}, { "type": "image", "url": "https://example.com/test.png", }, - { - "type": "image", - "source_type": "url", # backward compatibility - "url": "https://example.com/test.png", - }, - { - "type": "image", - "base64": "", - "mime_type": "image/png", - }, { "type": "image", "source_type": "base64", "data": "", "mime_type": "image/png", }, - { - "type": "file", - "base64": "", - "mime_type": "application/pdf", - "filename": "test.pdf", - }, { "type": "file", "source_type": "base64", @@ -1253,26 +1239,18 @@ def test_convert_to_openai_messages_multimodal() -> None: "filename": "test.pdf", }, { + # OpenAI Chat Completions file format "type": "file", "file": { "filename": "draconomicon.pdf", "file_data": "data:application/pdf;base64,", }, }, - { - "type": "file", - "file_id": "file-abc123", - }, { "type": "file", "source_type": "id", "id": "file-abc123", }, - { - "type": "audio", - "base64": "", - "mime_type": "audio/wav", - }, { "type": "audio", "source_type": "base64", @@ -1286,6 +1264,32 @@ def test_convert_to_openai_messages_multimodal() -> None: "format": "wav", }, }, + # v1 Additions + { + "type": "image", + "source_type": "url", # backward compatibility v0 block field + "url": "https://example.com/test.png", + }, + { + "type": "image", + "base64": "", + "mime_type": "image/png", + }, + { + "type": "file", + "base64": "", + "mime_type": "application/pdf", + "filename": "test.pdf", # backward compatibility v0 block field + }, + { + "type": "file", + "file_id": "file-abc123", + }, + { + "type": "audio", + "base64": "", + "mime_type": "audio/wav", + }, ] ) ] @@ -1294,7 +1298,7 @@ def test_convert_to_openai_messages_multimodal() -> None: message = result[0] assert len(message["content"]) == 13 - # Test adding filename + # Test auto-adding filename messages = [ HumanMessage( content=[ @@ -1313,6 +1317,7 @@ def test_convert_to_openai_messages_multimodal() -> None: assert len(message["content"]) == 1 block = message["content"][0] assert block == { + # OpenAI Chat Completions file format "type": "file", "file": { "file_data": "data:application/pdf;base64,", From 2375c3a4d0402a8f12d9fa7490f3e08ef98c9b68 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Fri, 15 Aug 2025 16:39:36 -0400 Subject: [PATCH 34/73] add note --- libs/core/langchain_core/messages/content_blocks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index 9a3313488f5bc..a124dd56e69a7 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -1047,6 +1047,7 @@ def convert_to_openai_image_block(block: dict[str, Any]) -> dict: def convert_to_openai_data_block(block: dict) -> dict: """Format standard data content block to format expected by OpenAI.""" + # TODO: make sure this supports new v1 if block["type"] == "image": formatted_block = convert_to_openai_image_block(block) From aca7c1fe6a1a55c4134cb31f7046153751869938 Mon Sep 17 00:00:00 2001 From: ccurme Date: Mon, 18 Aug 2025 10:45:06 -0300 Subject: [PATCH 35/73] fix(core): temporarily fix tests (#32589) --- libs/core/langchain_core/messages/__init__.py | 3 +++ libs/core/tests/unit_tests/messages/test_imports.py | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/libs/core/langchain_core/messages/__init__.py b/libs/core/langchain_core/messages/__init__.py index 3787da41e8f70..31e4b560b2b8d 100644 --- a/libs/core/langchain_core/messages/__init__.py +++ b/libs/core/langchain_core/messages/__init__.py @@ -176,6 +176,9 @@ "FunctionMessageChunk": "function", "HumanMessage": "human", "HumanMessageChunk": "human", + "LC_AUTO_PREFIX": "content_blocks", + "LC_ID_PREFIX": "content_blocks", + "_LC_ID_PREFIX": "ai", "NonStandardAnnotation": "content_blocks", "NonStandardContentBlock": "content_blocks", "PlainTextContentBlock": "content_blocks", diff --git a/libs/core/tests/unit_tests/messages/test_imports.py b/libs/core/tests/unit_tests/messages/test_imports.py index 750f2f49f060d..ada1c882a7242 100644 --- a/libs/core/tests/unit_tests/messages/test_imports.py +++ b/libs/core/tests/unit_tests/messages/test_imports.py @@ -25,6 +25,9 @@ "HumanMessageChunk", "ImageContentBlock", "InvalidToolCall", + "_LC_ID_PREFIX", + "LC_AUTO_PREFIX", + "LC_ID_PREFIX", "NonStandardAnnotation", "NonStandardContentBlock", "PlainTextContentBlock", @@ -41,6 +44,7 @@ "ReasoningContentBlock", "RemoveMessage", "convert_to_messages", + "ensure_id", "get_buffer_string", "is_data_content_block", "is_reasoning_block", From aeea0e3ff888a1b087489834fbc8142c3dfe9c52 Mon Sep 17 00:00:00 2001 From: ccurme Date: Mon, 18 Aug 2025 10:49:01 -0300 Subject: [PATCH 36/73] fix(langchain): fix tests on standard content branch (#32590) --- libs/langchain/langchain/agents/output_parsers/tools.py | 7 ++++++- libs/langchain/tests/unit_tests/chat_models/test_base.py | 1 + .../tests/unit_tests/chat_models/test_chat_models.py | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/libs/langchain/langchain/agents/output_parsers/tools.py b/libs/langchain/langchain/agents/output_parsers/tools.py index b7ec8a47aa19a..1cf1ed8680f03 100644 --- a/libs/langchain/langchain/agents/output_parsers/tools.py +++ b/libs/langchain/langchain/agents/output_parsers/tools.py @@ -47,7 +47,12 @@ def parse_ai_message_to_tool_action( try: args = json.loads(function["arguments"] or "{}") tool_calls.append( - ToolCall(name=function_name, args=args, id=tool_call["id"]), + ToolCall( + type="tool_call", + name=function_name, + args=args, + id=tool_call["id"], + ), ) except JSONDecodeError as e: msg = ( diff --git a/libs/langchain/tests/unit_tests/chat_models/test_base.py b/libs/langchain/tests/unit_tests/chat_models/test_base.py index 611f251b8162c..bfd9826fb6d63 100644 --- a/libs/langchain/tests/unit_tests/chat_models/test_base.py +++ b/libs/langchain/tests/unit_tests/chat_models/test_base.py @@ -277,6 +277,7 @@ def test_configurable_with_default() -> None: "model_kwargs": {}, "streaming": False, "stream_usage": True, + "output_version": "v0", }, "kwargs": { "tools": [{"name": "foo", "description": "foo", "input_schema": {}}], diff --git a/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py b/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py index 450844cbec175..7862ec1d4a8b6 100644 --- a/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py +++ b/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py @@ -277,6 +277,7 @@ def test_configurable_with_default() -> None: "model_kwargs": {}, "streaming": False, "stream_usage": True, + "output_version": "v0", }, "kwargs": { "tools": [{"name": "foo", "description": "foo", "input_schema": {}}], From 4790c7265a205e45d1ac8e9db079c7334511aff8 Mon Sep 17 00:00:00 2001 From: ccurme Date: Mon, 18 Aug 2025 11:30:49 -0300 Subject: [PATCH 37/73] feat(core): lazy-load standard content (#32570) --- libs/core/langchain_core/messages/ai.py | 50 ++- .../messages/block_translators/__init__.py | 1 + .../messages/block_translators/openai.py | 345 ++++++++++++++++++ .../langchain_core/messages/content_blocks.py | 50 +-- libs/core/langchain_core/utils/_merge.py | 16 +- .../messages/block_translators/__init__.py | 0 .../messages/block_translators/test_openai.py | 231 ++++++++++++ .../prompts/__snapshots__/test_chat.ambr | 36 +- .../runnables/__snapshots__/test_graph.ambr | 18 +- .../__snapshots__/test_runnable.ambr | 144 +++++++- .../langchain_openai/chat_models/_compat.py | 276 +------------- .../langchain_openai/chat_models/base.py | 157 +++----- .../chat_models/test_responses_api.py | 62 ++-- .../tests/unit_tests/chat_models/test_base.py | 110 ------ .../chat_models/test_responses_stream.py | 21 +- 15 files changed, 917 insertions(+), 600 deletions(-) create mode 100644 libs/core/langchain_core/messages/block_translators/__init__.py create mode 100644 libs/core/langchain_core/messages/block_translators/openai.py create mode 100644 libs/core/tests/unit_tests/messages/block_translators/__init__.py create mode 100644 libs/core/tests/unit_tests/messages/block_translators/test_openai.py diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index e2e045c71071a..864444f53aeee 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -218,6 +218,16 @@ def lc_attributes(self) -> dict: @property def content_blocks(self) -> list[types.ContentBlock]: """Return content blocks of the message.""" + if self.response_metadata.get("output_version") == "v1": + return cast("list[types.ContentBlock]", self.content) + + model_provider = self.response_metadata.get("model_provider") + if model_provider == "openai": + from langchain_core.messages.block_translators import openai + + return openai.translate_content(self) + + # Otherwise, use best-effort parsing blocks = super().content_blocks if self.tool_calls: @@ -355,32 +365,34 @@ def lc_attributes(self) -> dict: @property def content_blocks(self) -> list[types.ContentBlock]: """Return content blocks of the message.""" + if self.response_metadata.get("output_version") == "v1": + return cast("list[types.ContentBlock]", self.content) + + model_provider = self.response_metadata.get("model_provider") + if model_provider == "openai": + from langchain_core.messages.block_translators import openai + + return openai.translate_content_chunk(self) + + # Otherwise, use best-effort parsing blocks = super().content_blocks - if self.tool_call_chunks: + if self.tool_call_chunks and not self.content: blocks = [ block for block in blocks if block["type"] not in ("tool_call", "invalid_tool_call") ] - # Add from tool_call_chunks if missing from content - content_tool_call_ids = { - block.get("id") - for block in self.content - if isinstance(block, dict) and block.get("type") == "tool_call_chunk" - } - for chunk in self.tool_call_chunks: - if (id_ := chunk.get("id")) and id_ not in content_tool_call_ids: - tool_call_chunk_block: types.ToolCallChunk = { - "type": "tool_call_chunk", - "id": id_, - "name": chunk["name"], - "args": chunk["args"], - "index": chunk.get("index"), - } - if "extras" in chunk: - tool_call_chunk_block["extras"] = chunk["extras"] # type: ignore[typeddict-item] - blocks.append(tool_call_chunk_block) + for tool_call_chunk in self.tool_call_chunks: + tc: types.ToolCallChunk = { + "type": "tool_call_chunk", + "id": tool_call_chunk.get("id"), + "name": tool_call_chunk.get("name"), + "args": tool_call_chunk.get("args"), + } + if (idx := tool_call_chunk.get("index")) is not None: + tc["index"] = idx + blocks.append(tc) return blocks diff --git a/libs/core/langchain_core/messages/block_translators/__init__.py b/libs/core/langchain_core/messages/block_translators/__init__.py new file mode 100644 index 0000000000000..1dd51cc836e3a --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/__init__.py @@ -0,0 +1 @@ +"""Derivations of standard content blocks from provider content.""" diff --git a/libs/core/langchain_core/messages/block_translators/openai.py b/libs/core/langchain_core/messages/block_translators/openai.py new file mode 100644 index 0000000000000..19ab0fbdae712 --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/openai.py @@ -0,0 +1,345 @@ +"""Derivations of standard content blocks from OpenAI content.""" + +from collections.abc import Iterable +from typing import Any, Optional, Union, cast + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content_blocks as types + + +# v1 / Chat Completions +def _convert_to_v1_from_chat_completions( + message: AIMessage, +) -> list[types.ContentBlock]: + """Mutate a Chat Completions message to v1 format.""" + content_blocks: list[types.ContentBlock] = [] + if isinstance(message.content, str): + if message.content: + content_blocks = [{"type": "text", "text": message.content}] + else: + content_blocks = [] + + for tool_call in message.tool_calls: + content_blocks.append(tool_call) + + return content_blocks + + +def _convert_to_v1_from_chat_completions_chunk( + chunk: AIMessageChunk, +) -> list[types.ContentBlock]: + """Mutate a Chat Completions chunk to v1 format.""" + content_blocks: list[types.ContentBlock] = [] + if isinstance(chunk.content, str): + if chunk.content: + content_blocks = [{"type": "text", "text": chunk.content}] + else: + content_blocks = [] + + for tool_call_chunk in chunk.tool_call_chunks: + tc: types.ToolCallChunk = { + "type": "tool_call_chunk", + "id": tool_call_chunk.get("id"), + "name": tool_call_chunk.get("name"), + "args": tool_call_chunk.get("args"), + } + if (idx := tool_call_chunk.get("index")) is not None: + tc["index"] = idx + content_blocks.append(tc) + + return content_blocks + + +def _convert_from_v1_to_chat_completions(message: AIMessage) -> AIMessage: + """Convert a v1 message to the Chat Completions format.""" + if isinstance(message.content, list): + new_content: list = [] + for block in message.content: + if isinstance(block, dict): + block_type = block.get("type") + if block_type == "text": + # Strip annotations + new_content.append({"type": "text", "text": block["text"]}) + elif block_type in ("reasoning", "tool_call"): + pass + else: + new_content.append(block) + else: + new_content.append(block) + return message.model_copy(update={"content": new_content}) + + return message + + +# v1 / Responses +def _convert_annotation_to_v1(annotation: dict[str, Any]) -> types.Annotation: + annotation_type = annotation.get("type") + + if annotation_type == "url_citation": + known_fields = { + "type", + "url", + "title", + "cited_text", + "start_index", + "end_index", + } + url_citation = cast("types.Citation", {}) + for field in ("end_index", "start_index", "title"): + if field in annotation: + url_citation[field] = annotation[field] + url_citation["type"] = "citation" + url_citation["url"] = annotation["url"] + for field, value in annotation.items(): + if field not in known_fields: + if "extras" not in url_citation: + url_citation["extras"] = {} + url_citation["extras"][field] = value + return url_citation + + if annotation_type == "file_citation": + known_fields = { + "type", + "title", + "cited_text", + "start_index", + "end_index", + "filename", + } + document_citation: types.Citation = {"type": "citation"} + if "filename" in annotation: + document_citation["title"] = annotation["filename"] + for field, value in annotation.items(): + if field not in known_fields: + if "extras" not in document_citation: + document_citation["extras"] = {} + document_citation["extras"][field] = value + + return document_citation + + # TODO: standardise container_file_citation? + non_standard_annotation: types.NonStandardAnnotation = { + "type": "non_standard_annotation", + "value": annotation, + } + return non_standard_annotation + + +def _explode_reasoning(block: dict[str, Any]) -> Iterable[types.ReasoningContentBlock]: + if "summary" not in block: + yield cast("types.ReasoningContentBlock", block) + return + + known_fields = {"type", "reasoning", "id", "index"} + unknown_fields = [ + field for field in block if field != "summary" and field not in known_fields + ] + if unknown_fields: + block["extras"] = {} + for field in unknown_fields: + block["extras"][field] = block.pop(field) + + if not block["summary"]: + # [{'id': 'rs_...', 'summary': [], 'type': 'reasoning', 'index': 0}] + block = {k: v for k, v in block.items() if k != "summary"} + if "index" in block: + meaningful_idx = f"{block['index']}_0" + block["index"] = f"lc_rs_{meaningful_idx.encode().hex()}" + yield cast("types.ReasoningContentBlock", block) + return + + # Common part for every exploded line, except 'summary' + common = {k: v for k, v in block.items() if k in known_fields} + + # Optional keys that must appear only in the first exploded item + first_only = block.pop("extras", None) + + for idx, part in enumerate(block["summary"]): + new_block = dict(common) + new_block["reasoning"] = part.get("text", "") + if idx == 0 and first_only: + new_block.update(first_only) + if "index" in new_block: + summary_index = part.get("index", 0) + meaningful_idx = f"{new_block['index']}_{summary_index}" + new_block["index"] = f"lc_rs_{meaningful_idx.encode().hex()}" + + yield cast("types.ReasoningContentBlock", new_block) + + +def _convert_to_v1_from_responses(message: AIMessage) -> list[types.ContentBlock]: + """Convert a Responses message to v1 format.""" + + def _iter_blocks() -> Iterable[types.ContentBlock]: + for raw_block in message.content: + if not isinstance(raw_block, dict): + continue + block = raw_block.copy() + block_type = block.get("type") + + if block_type == "text": + if "text" not in block: + block["text"] = "" + if "annotations" in block: + block["annotations"] = [ + _convert_annotation_to_v1(a) for a in block["annotations"] + ] + if "index" in block: + block["index"] = f"lc_txt_{block['index']}" + yield cast("types.TextContentBlock", block) + + elif block_type == "reasoning": + yield from _explode_reasoning(block) + + elif block_type == "image_generation_call" and ( + result := block.get("result") + ): + new_block = {"type": "image", "base64": result} + if output_format := block.get("output_format"): + new_block["mime_type"] = f"image/{output_format}" + if "id" in block: + new_block["id"] = block["id"] + if "index" in block: + new_block["index"] = f"lc_img_{block['index']}" + for extra_key in ( + "status", + "background", + "output_format", + "quality", + "revised_prompt", + "size", + ): + if extra_key in block: + if "extras" not in new_block: + new_block["extras"] = {} + new_block["extras"][extra_key] = block[extra_key] + yield cast("types.ImageContentBlock", new_block) + + elif block_type == "function_call": + tool_call_block: Optional[ + Union[types.ToolCall, types.InvalidToolCall, types.ToolCallChunk] + ] = None + call_id = block.get("call_id", "") + if ( + isinstance(message, AIMessageChunk) + and len(message.tool_call_chunks) == 1 + ): + tool_call_block = message.tool_call_chunks[0].copy() # type: ignore[assignment] + elif call_id: + for tool_call in message.tool_calls or []: + if tool_call.get("id") == call_id: + tool_call_block = tool_call.copy() + break + else: + for invalid_tool_call in message.invalid_tool_calls or []: + if invalid_tool_call.get("id") == call_id: + tool_call_block = invalid_tool_call.copy() + break + else: + pass + if tool_call_block: + if "id" in block: + if "extras" not in tool_call_block: + tool_call_block["extras"] = {} + tool_call_block["extras"]["item_id"] = block["id"] + if "index" in block: + tool_call_block["index"] = f"lc_tc_{block['index']}" + yield tool_call_block + + elif block_type == "web_search_call": + web_search_call = {"type": "web_search_call", "id": block["id"]} + if "index" in block: + web_search_call["index"] = f"lc_wsc_{block['index']}" + if ( + "action" in block + and isinstance(block["action"], dict) + and block["action"].get("type") == "search" + and "query" in block["action"] + ): + web_search_call["query"] = block["action"]["query"] + for key in block: + if key not in ("type", "id", "index"): + web_search_call[key] = block[key] + + yield cast("types.WebSearchCall", web_search_call) + + # If .content already has web_search_result, don't add + if not any( + isinstance(other_block, dict) + and other_block.get("type") == "web_search_result" + and other_block.get("id") == block["id"] + for other_block in message.content + ): + web_search_result = {"type": "web_search_result", "id": block["id"]} + if "index" in block and isinstance(block["index"], int): + web_search_result["index"] = f"lc_wsr_{block['index'] + 1}" + yield cast("types.WebSearchResult", web_search_result) + + elif block_type == "code_interpreter_call": + code_interpreter_call = { + "type": "code_interpreter_call", + "id": block["id"], + } + if "code" in block: + code_interpreter_call["code"] = block["code"] + if "index" in block: + code_interpreter_call["index"] = f"lc_cic_{block['index']}" + known_fields = {"type", "id", "language", "code", "extras", "index"} + for key in block: + if key not in known_fields: + if "extras" not in code_interpreter_call: + code_interpreter_call["extras"] = {} + code_interpreter_call["extras"][key] = block[key] + + code_interpreter_result = { + "type": "code_interpreter_result", + "id": block["id"], + } + if "outputs" in block: + code_interpreter_result["outputs"] = block["outputs"] + for output in block["outputs"]: + if ( + isinstance(output, dict) + and (output_type := output.get("type")) + and output_type == "logs" + ): + if "output" not in code_interpreter_result: + code_interpreter_result["output"] = [] + code_interpreter_result["output"].append( + { + "type": "code_interpreter_output", + "stdout": output.get("logs", ""), + } + ) + + if "status" in block: + code_interpreter_result["status"] = block["status"] + if "index" in block and isinstance(block["index"], int): + code_interpreter_result["index"] = f"lc_cir_{block['index'] + 1}" + + yield cast("types.CodeInterpreterCall", code_interpreter_call) + yield cast("types.CodeInterpreterResult", code_interpreter_result) + + elif block_type in types.KNOWN_BLOCK_TYPES: + yield cast("types.ContentBlock", block) + else: + new_block = {"type": "non_standard", "value": block} + if "index" in new_block["value"]: + new_block["index"] = f"lc_ns_{new_block['value'].pop('index')}" + yield cast("types.NonStandardContentBlock", new_block) + + return list(_iter_blocks()) + + +def translate_content(message: AIMessage) -> list[types.ContentBlock]: + """Derive standard content blocks from a message with OpenAI content.""" + if isinstance(message.content, str): + return _convert_to_v1_from_chat_completions(message) + return _convert_to_v1_from_responses(message) + + +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: + """Derive standard content blocks from a message chunk with OpenAI content.""" + if isinstance(message.content, str): + return _convert_to_v1_from_chat_completions_chunk(message) + return _convert_to_v1_from_responses(message) diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index a124dd56e69a7..61d458fe93675 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -274,7 +274,7 @@ class TextContentBlock(TypedDict): annotations: NotRequired[list[Annotation]] """``Citation``s and other annotations.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" extras: NotRequired[dict[str, Any]] @@ -324,7 +324,7 @@ class ToolCall(TypedDict): args: dict[str, Any] """The arguments to the tool call.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" extras: NotRequired[dict[str, Any]] @@ -371,7 +371,7 @@ class ToolCallChunk(TypedDict): args: Optional[str] """The arguments to the tool call.""" - index: Optional[int] + index: NotRequired[Union[int, str]] """The index of the tool call in a sequence.""" extras: NotRequired[dict[str, Any]] @@ -408,7 +408,7 @@ class InvalidToolCall(TypedDict): error: Optional[str] """An error message associated with the tool call.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" extras: NotRequired[dict[str, Any]] @@ -433,7 +433,7 @@ class WebSearchCall(TypedDict): query: NotRequired[str] """The search query used in the web search tool call.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" extras: NotRequired[dict[str, Any]] @@ -458,7 +458,7 @@ class WebSearchResult(TypedDict): urls: NotRequired[list[str]] """List of URLs returned by the web search tool call.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" extras: NotRequired[dict[str, Any]] @@ -486,7 +486,7 @@ class CodeInterpreterCall(TypedDict): code: NotRequired[str] """The code to be executed by the code interpreter.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" extras: NotRequired[dict[str, Any]] @@ -529,7 +529,7 @@ class CodeInterpreterOutput(TypedDict): file_ids: NotRequired[list[str]] """List of file IDs generated by the code interpreter.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" extras: NotRequired[dict[str, Any]] @@ -554,7 +554,7 @@ class CodeInterpreterResult(TypedDict): output: list[CodeInterpreterOutput] """List of outputs from the code interpreter tool call.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" extras: NotRequired[dict[str, Any]] @@ -593,7 +593,7 @@ class ReasoningContentBlock(TypedDict): """ - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" extras: NotRequired[dict[str, Any]] @@ -637,7 +637,7 @@ class ImageContentBlock(TypedDict): """ - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" url: NotRequired[str] @@ -684,7 +684,7 @@ class VideoContentBlock(TypedDict): """ - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" url: NotRequired[str] @@ -730,7 +730,7 @@ class AudioContentBlock(TypedDict): """ - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" url: NotRequired[str] @@ -777,7 +777,7 @@ class PlainTextContentBlock(TypedDict): mime_type: Literal["text/plain"] """MIME type of the file. Required for base64.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" url: NotRequired[str] @@ -840,7 +840,7 @@ class FileContentBlock(TypedDict): """ - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" url: NotRequired[str] @@ -895,7 +895,7 @@ class NonStandardContentBlock(TypedDict): value: dict[str, Any] """Provider-specific data.""" - index: NotRequired[int] + index: NotRequired[Union[int, str]] """Index of block in aggregate response. Used during streaming.""" @@ -1100,7 +1100,7 @@ def create_text_block( *, id: Optional[str] = None, annotations: Optional[list[Annotation]] = None, - index: Optional[int] = None, + index: Optional[Union[int, str]] = None, **kwargs: Any, ) -> TextContentBlock: """Create a ``TextContentBlock``. @@ -1143,7 +1143,7 @@ def create_image_block( file_id: Optional[str] = None, mime_type: Optional[str] = None, id: Optional[str] = None, - index: Optional[int] = None, + index: Optional[Union[int, str]] = None, **kwargs: Any, ) -> ImageContentBlock: """Create an ``ImageContentBlock``. @@ -1199,7 +1199,7 @@ def create_video_block( file_id: Optional[str] = None, mime_type: Optional[str] = None, id: Optional[str] = None, - index: Optional[int] = None, + index: Optional[Union[int, str]] = None, **kwargs: Any, ) -> VideoContentBlock: """Create a ``VideoContentBlock``. @@ -1259,7 +1259,7 @@ def create_audio_block( file_id: Optional[str] = None, mime_type: Optional[str] = None, id: Optional[str] = None, - index: Optional[int] = None, + index: Optional[Union[int, str]] = None, **kwargs: Any, ) -> AudioContentBlock: """Create an ``AudioContentBlock``. @@ -1319,7 +1319,7 @@ def create_file_block( file_id: Optional[str] = None, mime_type: Optional[str] = None, id: Optional[str] = None, - index: Optional[int] = None, + index: Optional[Union[int, str]] = None, **kwargs: Any, ) -> FileContentBlock: """Create a ``FileContentBlock``. @@ -1380,7 +1380,7 @@ def create_plaintext_block( title: Optional[str] = None, context: Optional[str] = None, id: Optional[str] = None, - index: Optional[int] = None, + index: Optional[Union[int, str]] = None, **kwargs: Any, ) -> PlainTextContentBlock: """Create a ``PlainTextContentBlock``. @@ -1436,7 +1436,7 @@ def create_tool_call( args: dict[str, Any], *, id: Optional[str] = None, - index: Optional[int] = None, + index: Optional[Union[int, str]] = None, **kwargs: Any, ) -> ToolCall: """Create a ``ToolCall``. @@ -1475,7 +1475,7 @@ def create_tool_call( def create_reasoning_block( reasoning: Optional[str] = None, id: Optional[str] = None, - index: Optional[int] = None, + index: Optional[Union[int, str]] = None, **kwargs: Any, ) -> ReasoningContentBlock: """Create a ``ReasoningContentBlock``. @@ -1561,7 +1561,7 @@ def create_non_standard_block( value: dict[str, Any], *, id: Optional[str] = None, - index: Optional[int] = None, + index: Optional[Union[int, str]] = None, ) -> NonStandardContentBlock: """Create a ``NonStandardContentBlock``. diff --git a/libs/core/langchain_core/utils/_merge.py b/libs/core/langchain_core/utils/_merge.py index 63d49de953c27..c32b09e2e669c 100644 --- a/libs/core/langchain_core/utils/_merge.py +++ b/libs/core/langchain_core/utils/_merge.py @@ -57,6 +57,11 @@ def merge_dicts(left: dict[str, Any], *others: dict[str, Any]) -> dict[str, Any] # "should either occur once or have the same value across " # "all dicts." # ) + if (right_k == "index" and merged[right_k].startswith("lc_")) or ( + right_k in ("id", "output_version", "model_provider") + and merged[right_k] == right_v + ): + continue merged[right_k] += right_v elif isinstance(merged[right_k], dict): merged[right_k] = merge_dicts(merged[right_k], right_v) @@ -93,7 +98,16 @@ def merge_lists(left: Optional[list], *others: Optional[list]) -> Optional[list] merged = other.copy() else: for e in other: - if isinstance(e, dict) and "index" in e and isinstance(e["index"], int): + if ( + isinstance(e, dict) + and "index" in e + and ( + isinstance(e["index"], int) + or ( + isinstance(e["index"], str) and e["index"].startswith("lc_") + ) + ) + ): to_merge = [ i for i, e_left in enumerate(merged) diff --git a/libs/core/tests/unit_tests/messages/block_translators/__init__.py b/libs/core/tests/unit_tests/messages/block_translators/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py new file mode 100644 index 0000000000000..00dae69865dab --- /dev/null +++ b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py @@ -0,0 +1,231 @@ +from typing import Optional + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content_blocks as types + + +def test_convert_to_v1_from_responses() -> None: + message = AIMessage( + [ + {"type": "reasoning", "id": "abc123", "summary": []}, + { + "type": "reasoning", + "id": "abc234", + "summary": [ + {"type": "summary_text", "text": "foo bar"}, + {"type": "summary_text", "text": "baz"}, + ], + }, + { + "type": "function_call", + "call_id": "call_123", + "name": "get_weather", + "arguments": '{"location": "San Francisco"}', + }, + { + "type": "function_call", + "call_id": "call_234", + "name": "get_weather_2", + "arguments": '{"location": "New York"}', + "id": "fc_123", + }, + {"type": "text", "text": "Hello "}, + { + "type": "text", + "text": "world", + "annotations": [ + {"type": "url_citation", "url": "https://example.com"}, + { + "type": "file_citation", + "filename": "my doc", + "index": 1, + "file_id": "file_123", + }, + {"bar": "baz"}, + ], + }, + {"type": "image_generation_call", "id": "ig_123", "result": "..."}, + {"type": "something_else", "foo": "bar"}, + ], + tool_calls=[ + { + "type": "tool_call", + "id": "call_123", + "name": "get_weather", + "args": {"location": "San Francisco"}, + }, + { + "type": "tool_call", + "id": "call_234", + "name": "get_weather_2", + "args": {"location": "New York"}, + }, + ], + response_metadata={"model_provider": "openai"}, + ) + expected_content: list[types.ContentBlock] = [ + {"type": "reasoning", "id": "abc123"}, + {"type": "reasoning", "id": "abc234", "reasoning": "foo bar"}, + {"type": "reasoning", "id": "abc234", "reasoning": "baz"}, + { + "type": "tool_call", + "id": "call_123", + "name": "get_weather", + "args": {"location": "San Francisco"}, + }, + { + "type": "tool_call", + "id": "call_234", + "name": "get_weather_2", + "args": {"location": "New York"}, + "extras": {"item_id": "fc_123"}, + }, + {"type": "text", "text": "Hello "}, + { + "type": "text", + "text": "world", + "annotations": [ + {"type": "citation", "url": "https://example.com"}, + { + "type": "citation", + "title": "my doc", + "extras": {"file_id": "file_123", "index": 1}, + }, + {"type": "non_standard_annotation", "value": {"bar": "baz"}}, + ], + }, + {"type": "image", "base64": "...", "id": "ig_123"}, + { + "type": "non_standard", + "value": {"type": "something_else", "foo": "bar"}, + }, + ] + assert message.content_blocks == expected_content + + # Check no mutation + assert message.content != expected_content + + +def test_convert_to_v1_from_responses_chunk() -> None: + chunks = [ + AIMessageChunk( + content=[{"type": "reasoning", "id": "abc123", "summary": [], "index": 0}], + response_metadata={"model_provider": "openai"}, + ), + AIMessageChunk( + content=[ + { + "type": "reasoning", + "id": "abc234", + "summary": [ + {"type": "summary_text", "text": "foo ", "index": 0}, + ], + "index": 1, + } + ], + response_metadata={"model_provider": "openai"}, + ), + AIMessageChunk( + content=[ + { + "type": "reasoning", + "id": "abc234", + "summary": [ + {"type": "summary_text", "text": "bar", "index": 0}, + ], + "index": 1, + } + ], + response_metadata={"model_provider": "openai"}, + ), + AIMessageChunk( + content=[ + { + "type": "reasoning", + "id": "abc234", + "summary": [ + {"type": "summary_text", "text": "baz", "index": 1}, + ], + "index": 1, + } + ], + response_metadata={"model_provider": "openai"}, + ), + ] + expected_chunks = [ + AIMessageChunk( + content=[{"type": "reasoning", "id": "abc123", "index": "lc_rs_305f30"}], + response_metadata={"model_provider": "openai"}, + ), + AIMessageChunk( + content=[ + { + "type": "reasoning", + "id": "abc234", + "reasoning": "foo ", + "index": "lc_rs_315f30", + } + ], + response_metadata={"model_provider": "openai"}, + ), + AIMessageChunk( + content=[ + { + "type": "reasoning", + "id": "abc234", + "reasoning": "bar", + "index": "lc_rs_315f30", + } + ], + response_metadata={"model_provider": "openai"}, + ), + AIMessageChunk( + content=[ + { + "type": "reasoning", + "id": "abc234", + "reasoning": "baz", + "index": "lc_rs_315f31", + } + ], + response_metadata={"model_provider": "openai"}, + ), + ] + for chunk, expected in zip(chunks, expected_chunks): + assert chunk.content_blocks == expected.content_blocks + + full: Optional[AIMessageChunk] = None + for chunk in chunks: + full = chunk if full is None else full + chunk # type: ignore[assignment] + assert isinstance(full, AIMessageChunk) + + expected_content = [ + {"type": "reasoning", "id": "abc123", "summary": [], "index": 0}, + { + "type": "reasoning", + "id": "abc234", + "summary": [ + {"type": "summary_text", "text": "foo bar", "index": 0}, + {"type": "summary_text", "text": "baz", "index": 1}, + ], + "index": 1, + }, + ] + assert full.content == expected_content + + expected_content_blocks = [ + {"type": "reasoning", "id": "abc123", "index": "lc_rs_305f30"}, + { + "type": "reasoning", + "id": "abc234", + "reasoning": "foo bar", + "index": "lc_rs_315f30", + }, + { + "type": "reasoning", + "id": "abc234", + "reasoning": "baz", + "index": "lc_rs_315f31", + }, + ] + assert full.content_blocks == expected_content_blocks diff --git a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr index 08a1c528cfb6f..1ff3d7aec133f 100644 --- a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr +++ b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr @@ -768,8 +768,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -1036,8 +1043,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', @@ -2225,8 +2239,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -2493,8 +2514,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr index d3a746eaf7966..4f6c54a28d452 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr @@ -1171,8 +1171,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -1439,8 +1446,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index db17757a8c95f..7d5642ce853a2 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -2716,8 +2716,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -2981,8 +2988,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', @@ -4217,8 +4231,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -4501,8 +4522,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', @@ -5749,8 +5777,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -6033,8 +6068,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', @@ -7156,8 +7198,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -7421,8 +7470,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', @@ -8699,8 +8755,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -8983,8 +9046,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', @@ -10151,8 +10221,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -10416,8 +10493,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', @@ -11602,8 +11686,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -11897,8 +11988,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', @@ -13095,8 +13193,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'anyOf': list([ @@ -13379,8 +13484,15 @@ 'title': 'Id', }), 'index': dict({ + 'anyOf': list([ + dict({ + 'type': 'integer', + }), + dict({ + 'type': 'string', + }), + ]), 'title': 'Index', - 'type': 'integer', }), 'name': dict({ 'title': 'Name', diff --git a/libs/partners/openai/langchain_openai/chat_models/_compat.py b/libs/partners/openai/langchain_openai/chat_models/_compat.py index d47aa735e15b5..967652ae9962d 100644 --- a/libs/partners/openai/langchain_openai/chat_models/_compat.py +++ b/libs/partners/openai/langchain_openai/chat_models/_compat.py @@ -66,9 +66,9 @@ import json from collections.abc import Iterable, Iterator -from typing import Any, Literal, Optional, Union, cast +from typing import Any, Literal, Union, cast -from langchain_core.messages import AIMessage, AIMessageChunk, is_data_content_block +from langchain_core.messages import AIMessage, is_data_content_block from langchain_core.messages import content_blocks as types _FUNCTION_CALL_IDS_MAP_KEY = "__openai_function_call_ids__" @@ -262,46 +262,6 @@ def _convert_from_v03_ai_message(message: AIMessage) -> AIMessage: # v1 / Chat Completions -def _convert_to_v1_from_chat_completions(message: AIMessage) -> AIMessage: - """Mutate a Chat Completions message to v1 format.""" - if isinstance(message.content, str): - if message.content: - message.content = [{"type": "text", "text": message.content}] - else: - message.content = [] - - for tool_call in message.tool_calls: - message.content.append(cast(dict, tool_call)) - - if "tool_calls" in message.additional_kwargs: - _ = message.additional_kwargs.pop("tool_calls") - - if "token_usage" in message.response_metadata: - _ = message.response_metadata.pop("token_usage") - - return message - - -def _convert_to_v1_from_chat_completions_chunk(chunk: AIMessageChunk) -> AIMessageChunk: - """Mutate a Chat Completions chunk to v1 format.""" - if isinstance(chunk.content, str): - if chunk.content: - chunk.content = [{"type": "text", "text": chunk.content}] - else: - chunk.content = [] - - for tool_call_chunk in chunk.tool_call_chunks: - chunk.content.append(cast(dict, tool_call_chunk)) - - if "tool_calls" in chunk.additional_kwargs: - _ = chunk.additional_kwargs.pop("tool_calls") - - if "token_usage" in chunk.response_metadata: - _ = chunk.response_metadata.pop("token_usage") - - return chunk - - def _convert_from_v1_to_chat_completions(message: AIMessage) -> AIMessage: """Convert a v1 message to the Chat Completions format.""" if isinstance(message.content, list): @@ -324,230 +284,6 @@ def _convert_from_v1_to_chat_completions(message: AIMessage) -> AIMessage: # v1 / Responses -def _convert_annotation_to_v1(annotation: dict[str, Any]) -> types.Annotation: - annotation_type = annotation.get("type") - - if annotation_type == "url_citation": - known_fields = { - "type", - "url", - "title", - "cited_text", - "start_index", - "end_index", - } - url_citation = cast(types.Citation, {}) - for field in ("end_index", "start_index", "title"): - if field in annotation: - url_citation[field] = annotation[field] - url_citation["type"] = "citation" - url_citation["url"] = annotation["url"] - for field in annotation: - if field not in known_fields: - if "extras" not in url_citation: - url_citation["extras"] = {} - url_citation["extras"][field] = annotation[field] - return url_citation - - elif annotation_type == "file_citation": - known_fields = {"type", "title", "cited_text", "start_index", "end_index"} - document_citation: types.Citation = {"type": "citation"} - if "filename" in annotation: - document_citation["title"] = annotation.pop("filename") - for field in annotation: - if field not in known_fields: - if "extras" not in document_citation: - document_citation["extras"] = {} - document_citation["extras"][field] = annotation[field] - - return document_citation - - # TODO: standardise container_file_citation? - else: - non_standard_annotation: types.NonStandardAnnotation = { - "type": "non_standard_annotation", - "value": annotation, - } - return non_standard_annotation - - -def _explode_reasoning(block: dict[str, Any]) -> Iterable[types.ReasoningContentBlock]: - if "summary" not in block: - yield cast(types.ReasoningContentBlock, block) - return - - known_fields = {"type", "reasoning", "id", "index"} - unknown_fields = [ - field for field in block if field != "summary" and field not in known_fields - ] - if unknown_fields: - block["extras"] = {} - for field in unknown_fields: - block["extras"][field] = block.pop(field) - - if not block["summary"]: - _ = block.pop("summary", None) - yield cast(types.ReasoningContentBlock, block) - return - - # Common part for every exploded line, except 'summary' - common = {k: v for k, v in block.items() if k in known_fields} - - # Optional keys that must appear only in the first exploded item - first_only = block.pop("extras", None) - - for idx, part in enumerate(block["summary"]): - new_block = dict(common) - new_block["reasoning"] = part.get("text", "") - if idx == 0 and first_only: - new_block.update(first_only) - yield cast(types.ReasoningContentBlock, new_block) - - -def _convert_to_v1_from_responses( - content: list[dict[str, Any]], - tool_calls: Optional[list[types.ToolCall]] = None, - invalid_tool_calls: Optional[list[types.InvalidToolCall]] = None, -) -> list[types.ContentBlock]: - """Mutate a Responses message to v1 format.""" - - def _iter_blocks() -> Iterable[types.ContentBlock]: - for block in content: - if not isinstance(block, dict): - continue - block_type = block.get("type") - - if block_type == "text": - if "annotations" in block: - block["annotations"] = [ - _convert_annotation_to_v1(a) for a in block["annotations"] - ] - yield cast(types.TextContentBlock, block) - - elif block_type == "reasoning": - yield from _explode_reasoning(block) - - elif block_type == "image_generation_call" and ( - result := block.get("result") - ): - new_block = {"type": "image", "base64": result} - if output_format := block.get("output_format"): - new_block["mime_type"] = f"image/{output_format}" - if "id" in block: - new_block["id"] = block["id"] - if "index" in block: - new_block["index"] = block["index"] - for extra_key in ( - "status", - "background", - "output_format", - "quality", - "revised_prompt", - "size", - ): - if extra_key in block: - if "extras" not in new_block: - new_block["extras"] = {} - new_block["extras"][extra_key] = block[extra_key] - yield cast(types.ImageContentBlock, new_block) - - elif block_type == "function_call": - tool_call_block: Optional[ - Union[types.ToolCall, types.InvalidToolCall] - ] = None - call_id = block.get("call_id", "") - if call_id: - for tool_call in tool_calls or []: - if tool_call.get("id") == call_id: - tool_call_block = cast(types.ToolCall, tool_call.copy()) - break - else: - for invalid_tool_call in invalid_tool_calls or []: - if invalid_tool_call.get("id") == call_id: - tool_call_block = cast( - types.InvalidToolCall, invalid_tool_call.copy() - ) - break - if tool_call_block: - if "id" in block: - if "extras" not in tool_call_block: - tool_call_block["extras"] = {} - tool_call_block["extras"]["item_id"] = block["id"] # type: ignore[typeddict-item] - if "index" in block: - tool_call_block["index"] = block["index"] - yield tool_call_block - - elif block_type == "web_search_call": - web_search_call = {"type": "web_search_call", "id": block["id"]} - if "index" in block: - web_search_call["index"] = block["index"] - if ( - "action" in block - and isinstance(block["action"], dict) - and block["action"].get("type") == "search" - and "query" in block["action"] - ): - web_search_call["query"] = block["action"]["query"] - for key in block: - if key not in ("type", "id"): - web_search_call[key] = block[key] - - web_search_result = {"type": "web_search_result", "id": block["id"]} - if "index" in block: - web_search_result["index"] = block["index"] + 1 - yield cast(types.WebSearchCall, web_search_call) - yield cast(types.WebSearchResult, web_search_result) - - elif block_type == "code_interpreter_call": - code_interpreter_call = { - "type": "code_interpreter_call", - "id": block["id"], - } - if "code" in block: - code_interpreter_call["code"] = block["code"] - if "container_id" in block: - code_interpreter_call["container_id"] = block["container_id"] - if "index" in block: - code_interpreter_call["index"] = block["index"] - - code_interpreter_result = { - "type": "code_interpreter_result", - "id": block["id"], - } - if "outputs" in block: - code_interpreter_result["outputs"] = block["outputs"] - for output in block["outputs"]: - if ( - isinstance(output, dict) - and (output_type := output.get("type")) - and output_type == "logs" - ): - if "output" not in code_interpreter_result: - code_interpreter_result["output"] = [] - code_interpreter_result["output"].append( - { - "type": "code_interpreter_output", - "stdout": output.get("logs", ""), - } - ) - - if "status" in block: - code_interpreter_result["status"] = block["status"] - if "index" in block: - code_interpreter_result["index"] = block["index"] + 1 - - yield cast(types.CodeInterpreterCall, code_interpreter_call) - yield cast(types.CodeInterpreterResult, code_interpreter_result) - - else: - new_block = {"type": "non_standard", "value": block} - if "index" in new_block["value"]: - new_block["index"] = new_block["value"].pop("index") - yield cast(types.NonStandardContentBlock, new_block) - - return list(_iter_blocks()) - - def _convert_annotation_from_v1(annotation: types.Annotation) -> dict[str, Any]: if annotation["type"] == "citation": new_ann: dict[str, Any] = {} @@ -678,10 +414,18 @@ def _consolidate_calls( for key in ("code", "container_id"): if key in current: collapsed[key] = current[key] + elif key in current.get("extras", {}): + collapsed[key] = current["extras"][key] + else: + pass for key in ("outputs", "status"): if key in nxt: collapsed[key] = nxt[key] + elif key in nxt.get("extras", {}): + collapsed[key] = nxt["extras"][key] + else: + pass collapsed["type"] = "code_interpreter_call" yield collapsed diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index d8fca0513b17e..caeefdc662aae 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -69,6 +69,10 @@ OutputTokenDetails, UsageMetadata, ) +from langchain_core.messages.block_translators.openai import ( + translate_content, + translate_content_chunk, +) from langchain_core.messages.tool import tool_call_chunk from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser from langchain_core.output_parsers.openai_tools import ( @@ -111,9 +115,6 @@ _convert_from_v1_to_chat_completions, _convert_from_v1_to_responses, _convert_to_v03_ai_message, - _convert_to_v1_from_chat_completions, - _convert_to_v1_from_chat_completions_chunk, - _convert_to_v1_from_responses, ) if TYPE_CHECKING: @@ -925,9 +926,9 @@ def _convert_chunk_to_generation_chunk( generation_info=base_generation_info, ) if self.output_version == "v1": - generation_chunk.message = _convert_to_v1_from_chat_completions_chunk( - cast(AIMessageChunk, generation_chunk.message) - ) + generation_chunk.message.content = [] + generation_chunk.message.response_metadata["output_version"] = "v1" + return generation_chunk choice = choices[0] @@ -940,6 +941,7 @@ def _convert_chunk_to_generation_chunk( generation_info = {**base_generation_info} if base_generation_info else {} if finish_reason := choice.get("finish_reason"): + generation_info["model_provider"] = "openai" generation_info["finish_reason"] = finish_reason if model_name := chunk.get("model"): generation_info["model_name"] = model_name @@ -956,18 +958,11 @@ def _convert_chunk_to_generation_chunk( message_chunk.usage_metadata = usage_metadata if self.output_version == "v1": - message_chunk = cast(AIMessageChunk, message_chunk) - # Convert to v1 format - if isinstance(message_chunk.content, str): - message_chunk = _convert_to_v1_from_chat_completions_chunk( - message_chunk - ) - if message_chunk.content: - message_chunk.content[0]["index"] = 0 # type: ignore[index] - else: - message_chunk = _convert_to_v1_from_chat_completions_chunk( - message_chunk - ) + message_chunk.content = cast( + "Union[str, list[Union[str, dict]]]", + translate_content_chunk(cast(AIMessageChunk, message_chunk)), + ) + message_chunk.response_metadata["output_version"] = "v1" generation_chunk = ChatGenerationChunk( message=message_chunk, generation_info=generation_info or None @@ -1332,24 +1327,14 @@ def _create_chat_result( if hasattr(message, "parsed"): generations[0].message.additional_kwargs["parsed"] = message.parsed if hasattr(message, "refusal"): - if self.output_version in ("v0", "responses/v1"): - generations[0].message.additional_kwargs["refusal"] = ( - message.refusal - ) - elif self.output_version == "v1": - if isinstance(generations[0].message.content, list): - generations[0].message.content.append( - { - "type": "non_standard", - "value": {"refusal": message.refusal}, - } - ) + generations[0].message.additional_kwargs["refusal"] = message.refusal if self.output_version == "v1": - _ = llm_output.pop("token_usage", None) - generations[0].message = _convert_to_v1_from_chat_completions( - cast(AIMessage, generations[0].message) + generations[0].message.content = cast( + Union[str, list[Union[str, dict]]], + translate_content(cast(AIMessage, generations[0].message)), ) + generations[0].message.response_metadata["output_version"] = "v1" return ChatResult(generations=generations, llm_output=llm_output) @@ -4095,29 +4080,6 @@ def _construct_lc_result_from_responses_api( except json.JSONDecodeError: pass - if output_version == "v1": - content_blocks = _convert_to_v1_from_responses(content_blocks) - - if response.tools and any( - tool.type == "image_generation" for tool in response.tools - ): - # Get mime_time from tool definition and add to image generations - # if missing (primarily for tracing purposes). - image_generation_call = next( - tool for tool in response.tools if tool.type == "image_generation" - ) - if image_generation_call.output_format: - mime_type = f"image/{image_generation_call.output_format}" - for content_block in content_blocks: - # OK to mutate output message - if ( - isinstance(content_block, dict) - and content_block.get("type") == "image" - and "base64" in content_block - and "mime_type" not in block - ): - block["mime_type"] = mime_type - message = AIMessage( content=content_blocks, id=response.id, @@ -4129,6 +4091,11 @@ def _construct_lc_result_from_responses_api( ) if output_version == "v0": message = _convert_to_v03_ai_message(message) + elif output_version == "v1": + message.content = cast( + Union[str, list[Union[str, dict]]], translate_content(message) + ) + message.response_metadata["output_version"] = "v1" else: pass return ChatResult(generations=[ChatGeneration(message=message)]) @@ -4208,29 +4175,12 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: annotation = chunk.annotation else: annotation = chunk.annotation.model_dump(exclude_none=True, mode="json") - if output_version == "v1": - content.append( - { - "type": "text", - "text": "", - "annotations": [annotation], - "index": current_index, - } - ) - else: - content.append({"annotations": [annotation], "index": current_index}) + + content.append( + {"type": "text", "annotations": [annotation], "index": current_index} + ) elif chunk.type == "response.output_text.done": - if output_version == "v1": - content.append( - { - "type": "text", - "text": "", - "id": chunk.item_id, - "index": current_index, - } - ) - else: - content.append({"id": chunk.item_id, "index": current_index}) + content.append({"type": "text", "id": chunk.item_id, "index": current_index}) elif chunk.type == "response.created": id = chunk.response.id response_metadata["id"] = chunk.response.id # Backwards compatibility @@ -4328,30 +4278,18 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: reasoning["index"] = current_index content.append(reasoning) elif chunk.type == "response.reasoning_summary_part.added": - if output_version in ("v0", "responses/v1"): - _advance(chunk.output_index) - content.append( - { - # langchain-core uses the `index` key to aggregate text blocks. - "summary": [ - { - "index": chunk.summary_index, - "type": "summary_text", - "text": "", - } - ], - "index": current_index, - "type": "reasoning", - } - ) - else: - # v1 - block: dict = {"type": "reasoning", "reasoning": ""} - if chunk.summary_index > 0: - _advance(chunk.output_index, chunk.summary_index) - block["id"] = chunk.item_id - block["index"] = current_index - content.append(block) + _advance(chunk.output_index) + content.append( + { + # langchain-core uses the `index` key to aggregate text blocks. + "summary": [ + {"index": chunk.summary_index, "type": "summary_text", "text": ""} + ], + "index": current_index, + "type": "reasoning", + "id": chunk.item_id, + } + ) elif chunk.type == "response.image_generation_call.partial_image": # Partial images are not supported yet. pass @@ -4373,16 +4311,6 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: else: return current_index, current_output_index, current_sub_index, None - if output_version == "v1": - content = cast(list[dict], _convert_to_v1_from_responses(content)) - for content_block in content: - if ( - isinstance(content_block, dict) - and content_block.get("index", -1) > current_index - ): - # blocks were added for v1 - current_index = content_block["index"] - message = AIMessageChunk( content=content, # type: ignore[arg-type] tool_call_chunks=tool_call_chunks, @@ -4396,6 +4324,11 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: AIMessageChunk, _convert_to_v03_ai_message(message, has_reasoning=has_reasoning), ) + elif output_version == "v1": + message.content = cast( + Union[str, list[Union[str, dict]]], translate_content_chunk(message) + ) + message.response_metadata["output_version"] = "v1" else: pass return ( diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index 4ddf373ec78f1..74670bf4e123f 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -22,7 +22,7 @@ MODEL_NAME = "gpt-4o-mini" -def _check_response(response: Optional[BaseMessage], output_version: str) -> None: +def _check_response(response: Optional[BaseMessage]) -> None: assert isinstance(response, AIMessage) assert isinstance(response.content, list) for block in response.content: @@ -65,7 +65,7 @@ def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: "What was a positive news story from today?", tools=[{"type": "web_search_preview"}], ) - _check_response(first_response, output_version) + _check_response(first_response) # Test streaming full: Optional[BaseMessageChunk] = None # type: ignore[no-redef] @@ -75,7 +75,7 @@ def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: ): assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk - _check_response(full, output_version) + _check_response(full) # Use OpenAI's stateful API response = llm.invoke( @@ -83,7 +83,7 @@ def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: tools=[{"type": "web_search_preview"}], previous_response_id=first_response.response_metadata["id"], # type: ignore[typeddict-item] ) - _check_response(response, output_version) + _check_response(response) # Manually pass in chat history response = llm.invoke( @@ -94,13 +94,13 @@ def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: ], tools=[{"type": "web_search_preview"}], ) - _check_response(response, output_version) + _check_response(response) # Bind tool response = llm.bind_tools([{"type": "web_search_preview"}]).invoke( "What was a positive news story from today?" ) - _check_response(response, output_version) + _check_response(response) for msg in [first_response, full, response]: assert msg is not None @@ -118,7 +118,7 @@ async def test_web_search_async() -> None: "What was a positive news story from today?", tools=[{"type": "web_search_preview"}], ) - _check_response(response, "v0") + _check_response(response) assert response.response_metadata["status"] # Test streaming @@ -130,7 +130,7 @@ async def test_web_search_async() -> None: assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) - _check_response(full, "v0") + _check_response(full) for msg in [response, full]: assert msg.additional_kwargs["tool_outputs"] @@ -163,7 +163,7 @@ def multiply(x: int, y: int) -> int: assert set(full.tool_calls[0]["args"]) == {"x", "y"} response = bound_llm.invoke("What was a positive news story from today?") - _check_response(response, output_version) + _check_response(response) class Foo(BaseModel): @@ -373,14 +373,14 @@ def test_file_search() -> None: input_message = {"role": "user", "content": "What is deep research by OpenAI?"} response = llm.invoke([input_message], tools=[tool]) - _check_response(response, "v0") + _check_response(response) full: Optional[BaseMessageChunk] = None for chunk in llm.stream([input_message], tools=[tool]): assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) - _check_response(full, "v0") + _check_response(full) next_message = {"role": "user", "content": "Thank you."} _ = llm.invoke([input_message, full, next_message]) @@ -441,7 +441,7 @@ def test_stream_reasoning_summary( total_reasoning_blocks += 1 assert isinstance(block["id"], str) and block["id"].startswith("rs_") assert isinstance(block["reasoning"], str) - assert isinstance(block["index"], int) + assert isinstance(block["index"], str) assert ( total_reasoning_blocks > 1 ) # This query typically generates multiple reasoning blocks @@ -468,7 +468,7 @@ def test_code_interpreter(output_version: Literal["v0", "responses/v1", "v1"]) - } response = llm_with_tools.invoke([input_message]) assert isinstance(response, AIMessage) - _check_response(response, output_version) + _check_response(response) if output_version == "v0": tool_outputs = [ item @@ -501,7 +501,10 @@ def test_code_interpreter(output_version: Literal["v0", "responses/v1", "v1"]) - # Test streaming # Use same container - container_id = tool_outputs[0]["container_id"] + container_id = ( + tool_outputs[0].get("container_id") + or tool_outputs[0].get("extras")["container_id"] + ) llm_with_tools = llm.bind_tools( [{"type": "code_interpreter", "container": container_id}] ) @@ -746,22 +749,14 @@ def test_image_generation_streaming( assert complete_ai_message.additional_kwargs["tool_outputs"] tool_output = complete_ai_message.additional_kwargs["tool_outputs"][0] assert set(tool_output.keys()).issubset(expected_keys) - elif output_version == "responses/v1": + else: + # "responses/v1" tool_output = next( block for block in complete_ai_message.content if isinstance(block, dict) and block["type"] == "image_generation_call" ) assert set(tool_output.keys()).issubset(expected_keys) - else: - # v1 - standard_keys = {"type", "base64", "id", "status", "index"} - tool_output = next( - block - for block in complete_ai_message.content - if isinstance(block, dict) and block["type"] == "image" - ) - assert set(standard_keys).issubset(tool_output.keys()) @pytest.mark.default_cassette("test_image_generation_streaming.yaml.gz") @@ -829,7 +824,7 @@ def test_image_generation_multi_turn( ] ai_message = llm_with_tools.invoke(chat_history) assert isinstance(ai_message, AIMessage) - _check_response(ai_message, output_version) + _check_response(ai_message) expected_keys = { "id", @@ -895,26 +890,19 @@ def test_image_generation_multi_turn( ai_message2 = llm_with_tools.invoke(chat_history) assert isinstance(ai_message2, AIMessage) - _check_response(ai_message2, output_version) + _check_response(ai_message2) if output_version == "v0": tool_output = ai_message2.additional_kwargs["tool_outputs"][0] assert set(tool_output.keys()).issubset(expected_keys) - elif output_version == "responses/v1": + else: + # "responses/v1" tool_output = next( block for block in ai_message2.content if isinstance(block, dict) and block["type"] == "image_generation_call" ) assert set(tool_output.keys()).issubset(expected_keys) - else: - standard_keys = {"type", "base64", "id", "status"} - tool_output = next( - block - for block in ai_message2.content - if isinstance(block, dict) and block["type"] == "image" - ) - assert set(standard_keys).issubset(tool_output.keys()) @pytest.mark.default_cassette("test_image_generation_multi_turn.yaml.gz") @@ -938,7 +926,7 @@ def test_image_generation_multi_turn_v1() -> None: ] ai_message = llm_with_tools.invoke(chat_history) assert isinstance(ai_message, AIMessage) - _check_response(ai_message, "v1") + _check_response(ai_message) standard_keys = {"type", "base64", "mime_type", "id"} extra_keys = { @@ -975,7 +963,7 @@ def test_image_generation_multi_turn_v1() -> None: ai_message2 = llm_with_tools.invoke(chat_history) assert isinstance(ai_message2, AIMessage) - _check_response(ai_message2, "v1") + _check_response(ai_message2) tool_output = next( block diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index 47695efa37cc5..542b7330378d0 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -20,7 +20,6 @@ ToolCall, ToolMessage, ) -from langchain_core.messages import content_blocks as types from langchain_core.messages.ai import UsageMetadata from langchain_core.outputs import ChatGeneration, ChatResult from langchain_core.runnables import RunnableLambda @@ -55,7 +54,6 @@ _convert_from_v1_to_chat_completions, _convert_from_v1_to_responses, _convert_to_v03_ai_message, - _convert_to_v1_from_responses, ) from langchain_openai.chat_models.base import ( _construct_lc_result_from_responses_api, @@ -2585,114 +2583,6 @@ def test_convert_from_v1_to_responses( assert message_v1 != result -@pytest.mark.parametrize( - "responses_content, tool_calls, expected_content", - [ - ( - [ - {"type": "reasoning", "id": "abc123", "summary": []}, - { - "type": "reasoning", - "id": "abc234", - "summary": [ - {"type": "summary_text", "text": "foo "}, - {"type": "summary_text", "text": "bar"}, - ], - }, - { - "type": "function_call", - "call_id": "call_123", - "name": "get_weather", - "arguments": '{"location": "San Francisco"}', - }, - { - "type": "function_call", - "call_id": "call_234", - "name": "get_weather_2", - "arguments": '{"location": "New York"}', - "id": "fc_123", - }, - {"type": "text", "text": "Hello "}, - { - "type": "text", - "text": "world", - "annotations": [ - {"type": "url_citation", "url": "https://example.com"}, - { - "type": "file_citation", - "filename": "my doc", - "index": 1, - "file_id": "file_123", - }, - {"bar": "baz"}, - ], - }, - {"type": "image_generation_call", "id": "ig_123", "result": "..."}, - {"type": "something_else", "foo": "bar"}, - ], - [ - { - "type": "tool_call", - "id": "call_123", - "name": "get_weather", - "args": {"location": "San Francisco"}, - }, - { - "type": "tool_call", - "id": "call_234", - "name": "get_weather_2", - "args": {"location": "New York"}, - }, - ], - [ - {"type": "reasoning", "id": "abc123"}, - {"type": "reasoning", "id": "abc234", "reasoning": "foo "}, - {"type": "reasoning", "id": "abc234", "reasoning": "bar"}, - { - "type": "tool_call", - "id": "call_123", - "name": "get_weather", - "args": {"location": "San Francisco"}, - }, - { - "type": "tool_call", - "id": "call_234", - "name": "get_weather_2", - "args": {"location": "New York"}, - "extras": {"item_id": "fc_123"}, - }, - {"type": "text", "text": "Hello "}, - { - "type": "text", - "text": "world", - "annotations": [ - {"type": "citation", "url": "https://example.com"}, - { - "type": "citation", - "title": "my doc", - "extras": {"file_id": "file_123", "index": 1}, - }, - {"type": "non_standard_annotation", "value": {"bar": "baz"}}, - ], - }, - {"type": "image", "base64": "...", "id": "ig_123"}, - { - "type": "non_standard", - "value": {"type": "something_else", "foo": "bar"}, - }, - ], - ) - ], -) -def test_convert_to_v1_from_responses( - responses_content: list[dict[str, Any]], - tool_calls: list[ToolCall], - expected_content: list[types.ContentBlock], -) -> None: - result = _convert_to_v1_from_responses(responses_content, tool_calls) - assert result == expected_content - - def test_get_last_messages() -> None: messages: list[BaseMessage] = [HumanMessage("Hello")] last_messages, previous_response_id = _get_last_messages(messages) diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py b/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py index 49c88ab4aadeb..8d7c4a14c525e 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py @@ -676,19 +676,24 @@ def _strip_none(obj: Any) -> Any: "type": "reasoning", "reasoning": "reasoning block one", "id": "rs_123", - "index": 0, + "index": "lc_rs_305f30", }, { "type": "reasoning", "reasoning": "another reasoning block", "id": "rs_123", - "index": 1, + "index": "lc_rs_305f31", + }, + { + "type": "text", + "text": "text block one", + "index": "lc_txt_1", + "id": "msg_123", }, - {"type": "text", "text": "text block one", "index": 2, "id": "msg_123"}, { "type": "text", "text": "another text block", - "index": 3, + "index": "lc_txt_2", "id": "msg_123", }, { @@ -696,16 +701,16 @@ def _strip_none(obj: Any) -> Any: "reasoning": "more reasoning", "id": "rs_234", "extras": {"encrypted_content": "encrypted-content"}, - "index": 4, + "index": "lc_rs_335f30", }, { "type": "reasoning", "reasoning": "still more reasoning", "id": "rs_234", - "index": 5, + "index": "lc_rs_335f31", }, - {"type": "text", "text": "more", "index": 6, "id": "msg_234"}, - {"type": "text", "text": "text", "index": 7, "id": "msg_234"}, + {"type": "text", "text": "more", "index": "lc_txt_4", "id": "msg_234"}, + {"type": "text", "text": "text", "index": "lc_txt_5", "id": "msg_234"}, ], ), ], From 8ee0cbba3ce391b0d85ae36ba3376536814da857 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Mon, 18 Aug 2025 16:33:12 -0400 Subject: [PATCH 38/73] refactor(core): prefixes (#32597) re: #32589 cc: @ccurme - Rename namespace: `messages.content_blocks` -> `messages.content` - Prefixes and ID logic are now in `messages.common` instead of `AIMessage` since the logic is shared between messages and message content. Did this instead of `utils` due to circular import problems that were hairy --- .../language_models/chat_models.py | 14 ++-- libs/core/langchain_core/messages/__init__.py | 67 +++++++++---------- libs/core/langchain_core/messages/ai.py | 18 +++-- libs/core/langchain_core/messages/base.py | 32 ++++++++- .../messages/block_translators/openai.py | 2 +- .../{content_blocks.py => content.py} | 36 ++-------- libs/core/langchain_core/messages/human.py | 2 +- libs/core/langchain_core/messages/system.py | 2 +- libs/core/langchain_core/messages/tool.py | 6 +- libs/core/langchain_core/messages/utils.py | 5 +- libs/core/langchain_core/runnables/base.py | 2 +- .../messages/block_translators/test_openai.py | 2 +- .../core/tests/unit_tests/messages/test_ai.py | 2 +- .../tests/unit_tests/messages/test_imports.py | 1 - libs/core/tests/unit_tests/test_messages.py | 2 +- .../langchain_openai/chat_models/_compat.py | 2 +- 16 files changed, 101 insertions(+), 94 deletions(-) rename libs/core/langchain_core/messages/{content_blocks.py => content.py} (97%) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 572c805c0be0b..8aed5b134b00e 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -35,6 +35,7 @@ ) from langchain_core.load import dumpd, dumps from langchain_core.messages import ( + LC_ID_PREFIX, AIMessage, AnyMessage, BaseMessage, @@ -46,7 +47,6 @@ is_data_content_block, message_chunk_to_message, ) -from langchain_core.messages.ai import _LC_ID_PREFIX from langchain_core.outputs import ( ChatGeneration, ChatGenerationChunk, @@ -540,7 +540,7 @@ def stream( try: input_messages = _normalize_messages(messages) - run_id = "-".join((_LC_ID_PREFIX, str(run_manager.run_id))) + run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id))) for chunk in self._stream(input_messages, stop=stop, **kwargs): if chunk.message.id is None: chunk.message.id = run_id @@ -633,7 +633,7 @@ async def astream( try: input_messages = _normalize_messages(messages) - run_id = "-".join((_LC_ID_PREFIX, str(run_manager.run_id))) + run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id))) async for chunk in self._astream( input_messages, stop=stop, @@ -1099,7 +1099,7 @@ def _generate_with_cache( chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) if run_manager: if chunk.message.id is None: - chunk.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}" + chunk.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}" run_manager.on_llm_new_token( cast("str", chunk.message.content), chunk=chunk ) @@ -1115,7 +1115,7 @@ def _generate_with_cache( # Add response metadata to each generation for idx, generation in enumerate(result.generations): if run_manager and generation.message.id is None: - generation.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}-{idx}" + generation.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}-{idx}" generation.message.response_metadata = _gen_info_and_msg_metadata( generation ) @@ -1172,7 +1172,7 @@ async def _agenerate_with_cache( chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) if run_manager: if chunk.message.id is None: - chunk.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}" + chunk.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}" await run_manager.on_llm_new_token( cast("str", chunk.message.content), chunk=chunk ) @@ -1188,7 +1188,7 @@ async def _agenerate_with_cache( # Add response metadata to each generation for idx, generation in enumerate(result.generations): if run_manager and generation.message.id is None: - generation.message.id = f"{_LC_ID_PREFIX}-{run_manager.run_id}-{idx}" + generation.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}-{idx}" generation.message.response_metadata = _gen_info_and_msg_metadata( generation ) diff --git a/libs/core/langchain_core/messages/__init__.py b/libs/core/langchain_core/messages/__init__.py index 31e4b560b2b8d..f3224bc8e1b0e 100644 --- a/libs/core/langchain_core/messages/__init__.py +++ b/libs/core/langchain_core/messages/__init__.py @@ -21,21 +21,21 @@ if TYPE_CHECKING: from langchain_core.messages.ai import ( - _LC_ID_PREFIX, AIMessage, AIMessageChunk, ) from langchain_core.messages.base import ( + LC_AUTO_PREFIX, + LC_ID_PREFIX, BaseMessage, BaseMessageChunk, + ensure_id, merge_content, message_to_dict, messages_to_dict, ) from langchain_core.messages.chat import ChatMessage, ChatMessageChunk - from langchain_core.messages.content_blocks import ( - LC_AUTO_PREFIX, - LC_ID_PREFIX, + from langchain_core.messages.content import ( Annotation, AudioContentBlock, Citation, @@ -56,7 +56,6 @@ WebSearchResult, convert_to_openai_data_block, convert_to_openai_image_block, - ensure_id, is_data_content_block, is_reasoning_block, is_text_block, @@ -91,7 +90,6 @@ __all__ = ( "LC_AUTO_PREFIX", "LC_ID_PREFIX", - "_LC_ID_PREFIX", "AIMessage", "AIMessageChunk", "Annotation", @@ -153,63 +151,62 @@ ) _dynamic_imports = { - "ensure_id": "content_blocks", + "ensure_id": "base", "AIMessage": "ai", "AIMessageChunk": "ai", - "Annotation": "content_blocks", - "AudioContentBlock": "content_blocks", + "Annotation": "content", + "AudioContentBlock": "content", "BaseMessage": "base", "BaseMessageChunk": "base", "merge_content": "base", "message_to_dict": "base", "messages_to_dict": "base", - "Citation": "content_blocks", - "ContentBlock": "content_blocks", + "Citation": "content", + "ContentBlock": "content", "ChatMessage": "chat", "ChatMessageChunk": "chat", - "CodeInterpreterCall": "content_blocks", - "CodeInterpreterOutput": "content_blocks", - "CodeInterpreterResult": "content_blocks", - "DataContentBlock": "content_blocks", - "FileContentBlock": "content_blocks", + "CodeInterpreterCall": "content", + "CodeInterpreterOutput": "content", + "CodeInterpreterResult": "content", + "DataContentBlock": "content", + "FileContentBlock": "content", "FunctionMessage": "function", "FunctionMessageChunk": "function", "HumanMessage": "human", "HumanMessageChunk": "human", - "LC_AUTO_PREFIX": "content_blocks", - "LC_ID_PREFIX": "content_blocks", - "_LC_ID_PREFIX": "ai", - "NonStandardAnnotation": "content_blocks", - "NonStandardContentBlock": "content_blocks", - "PlainTextContentBlock": "content_blocks", - "ReasoningContentBlock": "content_blocks", + "LC_AUTO_PREFIX": "base", + "LC_ID_PREFIX": "base", + "NonStandardAnnotation": "content", + "NonStandardContentBlock": "content", + "PlainTextContentBlock": "content", + "ReasoningContentBlock": "content", "RemoveMessage": "modifier", "SystemMessage": "system", "SystemMessageChunk": "system", - "WebSearchCall": "content_blocks", - "WebSearchResult": "content_blocks", - "ImageContentBlock": "content_blocks", + "WebSearchCall": "content", + "WebSearchResult": "content", + "ImageContentBlock": "content", "InvalidToolCall": "tool", - "TextContentBlock": "content_blocks", + "TextContentBlock": "content", "ToolCall": "tool", "ToolCallChunk": "tool", "ToolMessage": "tool", "ToolMessageChunk": "tool", - "VideoContentBlock": "content_blocks", + "VideoContentBlock": "content", "AnyMessage": "utils", "MessageLikeRepresentation": "utils", "_message_from_dict": "utils", "convert_to_messages": "utils", - "convert_to_openai_data_block": "content_blocks", - "convert_to_openai_image_block": "content_blocks", + "convert_to_openai_data_block": "content", + "convert_to_openai_image_block": "content", "convert_to_openai_messages": "utils", "filter_messages": "utils", "get_buffer_string": "utils", - "is_data_content_block": "content_blocks", - "is_reasoning_block": "content_blocks", - "is_text_block": "content_blocks", - "is_tool_call_block": "content_blocks", - "is_tool_call_chunk": "content_blocks", + "is_data_content_block": "content", + "is_reasoning_block": "content", + "is_text_block": "content", + "is_tool_call_block": "content", + "is_tool_call_chunk": "content", "merge_message_runs": "utils", "message_chunk_to_message": "utils", "messages_from_dict": "utils", diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 864444f53aeee..3492195b23151 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -8,8 +8,14 @@ from pydantic import model_validator from typing_extensions import NotRequired, Self, TypedDict, override -from langchain_core.messages import content_blocks as types -from langchain_core.messages.base import BaseMessage, BaseMessageChunk, merge_content +from langchain_core.messages import content as types +from langchain_core.messages.base import ( + LC_AUTO_PREFIX, + LC_ID_PREFIX, + BaseMessage, + BaseMessageChunk, + merge_content, +) from langchain_core.messages.tool import ( InvalidToolCall, ToolCall, @@ -26,8 +32,6 @@ logger = logging.getLogger(__name__) -_LC_ID_PREFIX = types.LC_ID_PREFIX - class InputTokenDetails(TypedDict, total=False): """Breakdown of input token counts. @@ -525,15 +529,15 @@ def add_ai_message_chunks( for id_ in candidates: if ( id_ - and not id_.startswith(types.LC_ID_PREFIX) - and not id_.startswith(types.LC_AUTO_PREFIX) + and not id_.startswith(LC_ID_PREFIX) + and not id_.startswith(LC_AUTO_PREFIX) ): chunk_id = id_ break else: # second pass: prefer lc_run-* ids over lc_* ids for id_ in candidates: - if id_ and id_.startswith(types.LC_ID_PREFIX): + if id_ and id_.startswith(LC_ID_PREFIX): chunk_id = id_ break else: diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 871f687d35cf6..44f7d8ceeff95 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -3,11 +3,11 @@ from __future__ import annotations from typing import TYPE_CHECKING, Any, Optional, Union, cast, overload +from uuid import uuid4 from pydantic import ConfigDict, Field from langchain_core.load.serializable import Serializable -from langchain_core.messages import content_blocks as types from langchain_core.utils import get_bolded_text from langchain_core.utils._merge import merge_dicts, merge_lists from langchain_core.utils.interactive_env import is_interactive_env @@ -15,8 +15,21 @@ if TYPE_CHECKING: from collections.abc import Sequence + from langchain_core.messages import content as types from langchain_core.prompts.chat import ChatPromptTemplate +LC_AUTO_PREFIX = "lc_" +"""LangChain auto-generated ID prefix for messages and content blocks.""" + +LC_ID_PREFIX = f"{LC_AUTO_PREFIX}run-" +"""Internal tracing/callback system identifier. + +Used for: +- Tracing. Every LangChain operation (LLM call, chain execution, tool use, etc.) + gets a unique run_id (UUID) +- Enables tracking parent-child relationships between operations +""" + class BaseMessage(Serializable): """Base abstract message class. @@ -121,6 +134,8 @@ def content_blocks(self) -> list[types.ContentBlock]: Otherwise, does best-effort parsing to standard types. """ + from langchain_core.messages import content as types + blocks: list[types.ContentBlock] = [] content = ( [self.content] @@ -342,3 +357,18 @@ def get_msg_title_repr(title: str, *, bold: bool = False) -> str: if bold: padded = get_bolded_text(padded) return f"{sep}{padded}{second_sep}" + + +def ensure_id(id_val: Optional[str]) -> str: + """Ensure the ID is a valid string, generating a new UUID if not provided. + + Auto-generated UUIDs are prefixed by ``'lc_'`` to indicate they are + LangChain-generated IDs. + + Args: + id_val: Optional string ID value to validate. + + Returns: + A string ID, either the validated provided value or a newly generated UUID4. + """ + return id_val or str(f"{LC_AUTO_PREFIX}{uuid4()}") diff --git a/libs/core/langchain_core/messages/block_translators/openai.py b/libs/core/langchain_core/messages/block_translators/openai.py index 19ab0fbdae712..6f52f643484f0 100644 --- a/libs/core/langchain_core/messages/block_translators/openai.py +++ b/libs/core/langchain_core/messages/block_translators/openai.py @@ -4,7 +4,7 @@ from typing import Any, Optional, Union, cast from langchain_core.messages import AIMessage, AIMessageChunk -from langchain_core.messages import content_blocks as types +from langchain_core.messages import content as types # v1 / Chat Completions diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content.py similarity index 97% rename from libs/core/langchain_core/messages/content_blocks.py rename to libs/core/langchain_core/messages/content.py index 61d458fe93675..3e7ae5174b5a7 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content.py @@ -55,7 +55,7 @@ class TextContentBlock(TypedDict, extra_items=Any): .. code-block:: python - from langchain_core.messages.content_blocks import TextContentBlock + from langchain_core.messages.content import TextContentBlock # Create a text content block with provider-specific fields my_block: TextContentBlock = { @@ -97,7 +97,7 @@ class TextContentBlock(TypedDict, extra_items=Any): .. code-block:: python # Direct construction: - from langchain_core.messages.content_blocks import TextContentBlock, ImageContentBlock + from langchain_core.messages.content import TextContentBlock, ImageContentBlock multimodal_message: AIMessage(content_blocks= [ @@ -111,7 +111,7 @@ class TextContentBlock(TypedDict, extra_items=Any): ) # Using factories: - from langchain_core.messages.content_blocks import create_text_block, create_image_block + from langchain_core.messages.content import create_text_block, create_image_block multimodal_message: AIMessage(content= [ @@ -127,40 +127,14 @@ class TextContentBlock(TypedDict, extra_items=Any): - Automatic ID generation (when not provided) - No need to manually specify the ``type`` field -""" # noqa: E501 +""" import warnings from typing import Any, Literal, Optional, Union, get_args, get_type_hints -from uuid import uuid4 from typing_extensions import NotRequired, TypedDict, TypeGuard -LC_AUTO_PREFIX = "lc_" -"""LangChain auto-generated ID prefix for messages and content blocks.""" - -LC_ID_PREFIX = f"{LC_AUTO_PREFIX}run-" -"""Internal tracing/callback system identifier. - -Used for: -- Tracing. Every LangChain operation (LLM call, chain execution, tool use, etc.) - gets a unique run_id (UUID) -- Enables tracking parent-child relationships between operations -""" - - -def ensure_id(id_val: Optional[str]) -> str: - """Ensure the ID is a valid string, generating a new UUID if not provided. - - Auto-generated UUIDs are prefixed by ``'lc_'`` to indicate they are - LangChain-generated IDs. - - Args: - id_val: Optional string ID value to validate. - - Returns: - A string ID, either the validated provided value or a newly generated UUID4. - """ - return id_val or str(f"{LC_AUTO_PREFIX}{uuid4()}") +from langchain_core.messages.base import ensure_id class Citation(TypedDict): diff --git a/libs/core/langchain_core/messages/human.py b/libs/core/langchain_core/messages/human.py index 954f05f037ec2..a15a0f7533d11 100644 --- a/libs/core/langchain_core/messages/human.py +++ b/libs/core/langchain_core/messages/human.py @@ -2,7 +2,7 @@ from typing import Any, Literal, Optional, Union, cast, overload -from langchain_core.messages import content_blocks as types +from langchain_core.messages import content as types from langchain_core.messages.base import BaseMessage, BaseMessageChunk diff --git a/libs/core/langchain_core/messages/system.py b/libs/core/langchain_core/messages/system.py index a3f399b88c142..ca6589db8dbca 100644 --- a/libs/core/langchain_core/messages/system.py +++ b/libs/core/langchain_core/messages/system.py @@ -2,7 +2,7 @@ from typing import Any, Literal, Optional, Union, cast, overload -from langchain_core.messages import content_blocks as types +from langchain_core.messages import content as types from langchain_core.messages.base import BaseMessage, BaseMessageChunk diff --git a/libs/core/langchain_core/messages/tool.py b/libs/core/langchain_core/messages/tool.py index efb714bb58506..fab0315de63ca 100644 --- a/libs/core/langchain_core/messages/tool.py +++ b/libs/core/langchain_core/messages/tool.py @@ -7,10 +7,10 @@ from pydantic import Field, model_validator from typing_extensions import NotRequired, TypedDict, override -from langchain_core.messages import content_blocks as types +from langchain_core.messages import content as types from langchain_core.messages.base import BaseMessage, BaseMessageChunk, merge_content -from langchain_core.messages.content_blocks import InvalidToolCall as InvalidToolCall -from langchain_core.messages.content_blocks import ToolCall as ToolCall +from langchain_core.messages.content import InvalidToolCall as InvalidToolCall +from langchain_core.messages.content import ToolCall as ToolCall from langchain_core.utils._merge import merge_dicts, merge_obj diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py index e84dc6c0191ed..79c1c4b66c6c9 100644 --- a/libs/core/langchain_core/messages/utils.py +++ b/libs/core/langchain_core/messages/utils.py @@ -31,10 +31,13 @@ from pydantic import Discriminator, Field, Tag from langchain_core.exceptions import ErrorCode, create_message -from langchain_core.messages import convert_to_openai_data_block, is_data_content_block from langchain_core.messages.ai import AIMessage, AIMessageChunk from langchain_core.messages.base import BaseMessage, BaseMessageChunk from langchain_core.messages.chat import ChatMessage, ChatMessageChunk +from langchain_core.messages.content import ( + convert_to_openai_data_block, + is_data_content_block, +) from langchain_core.messages.function import FunctionMessage, FunctionMessageChunk from langchain_core.messages.human import HumanMessage, HumanMessageChunk from langchain_core.messages.modifier import RemoveMessage diff --git a/libs/core/langchain_core/runnables/base.py b/libs/core/langchain_core/runnables/base.py index 38da82fcda363..c3721f50ecc19 100644 --- a/libs/core/langchain_core/runnables/base.py +++ b/libs/core/langchain_core/runnables/base.py @@ -2399,7 +2399,7 @@ def as_tool( description: The description of the tool. Defaults to None. arg_types: A dictionary of argument names to types. Defaults to None. message_version: Version of ``ToolMessage`` to return given - :class:`~langchain_core.messages.content_blocks.ToolCall` input. + :class:`~langchain_core.messages.content.ToolCall` input. Returns: A ``BaseTool`` instance. diff --git a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py index 00dae69865dab..9e2510d56159d 100644 --- a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py +++ b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py @@ -1,7 +1,7 @@ from typing import Optional from langchain_core.messages import AIMessage, AIMessageChunk -from langchain_core.messages import content_blocks as types +from langchain_core.messages import content as types def test_convert_to_v1_from_responses() -> None: diff --git a/libs/core/tests/unit_tests/messages/test_ai.py b/libs/core/tests/unit_tests/messages/test_ai.py index b3c0a4d84ea08..67b0a2dc9680c 100644 --- a/libs/core/tests/unit_tests/messages/test_ai.py +++ b/libs/core/tests/unit_tests/messages/test_ai.py @@ -1,6 +1,6 @@ from langchain_core.load import dumpd, load from langchain_core.messages import AIMessage, AIMessageChunk -from langchain_core.messages import content_blocks as types +from langchain_core.messages import content as types from langchain_core.messages.ai import ( InputTokenDetails, OutputTokenDetails, diff --git a/libs/core/tests/unit_tests/messages/test_imports.py b/libs/core/tests/unit_tests/messages/test_imports.py index ada1c882a7242..bf438b0cd8eac 100644 --- a/libs/core/tests/unit_tests/messages/test_imports.py +++ b/libs/core/tests/unit_tests/messages/test_imports.py @@ -25,7 +25,6 @@ "HumanMessageChunk", "ImageContentBlock", "InvalidToolCall", - "_LC_ID_PREFIX", "LC_AUTO_PREFIX", "LC_ID_PREFIX", "NonStandardAnnotation", diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index 6fc42db829b36..3de287a287c1e 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -31,7 +31,7 @@ messages_from_dict, messages_to_dict, ) -from langchain_core.messages.content_blocks import KNOWN_BLOCK_TYPES, ContentBlock +from langchain_core.messages.content import KNOWN_BLOCK_TYPES, ContentBlock from langchain_core.messages.tool import invalid_tool_call as create_invalid_tool_call from langchain_core.messages.tool import tool_call as create_tool_call from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk diff --git a/libs/partners/openai/langchain_openai/chat_models/_compat.py b/libs/partners/openai/langchain_openai/chat_models/_compat.py index 967652ae9962d..eb9ce3f40d464 100644 --- a/libs/partners/openai/langchain_openai/chat_models/_compat.py +++ b/libs/partners/openai/langchain_openai/chat_models/_compat.py @@ -69,7 +69,7 @@ from typing import Any, Literal, Union, cast from langchain_core.messages import AIMessage, is_data_content_block -from langchain_core.messages import content_blocks as types +from langchain_core.messages import content as types _FUNCTION_CALL_IDS_MAP_KEY = "__openai_function_call_ids__" From 0e6c172893b0daa9085e79a60d3b09350595c091 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Mon, 18 Aug 2025 17:24:57 -0400 Subject: [PATCH 39/73] refactor(core): prefixes, again (#32599) Put in `core.utils` this time to prevent other circular import issues present in the `normalize()` rfc: `base` imports `content` `content` imports `ensure_id()` from `base` --- libs/core/langchain_core/messages/__init__.py | 7 +---- libs/core/langchain_core/messages/ai.py | 3 +- libs/core/langchain_core/messages/base.py | 28 ------------------ libs/core/langchain_core/messages/content.py | 2 +- libs/core/langchain_core/utils/utils.py | 29 +++++++++++++++++++ 5 files changed, 32 insertions(+), 37 deletions(-) diff --git a/libs/core/langchain_core/messages/__init__.py b/libs/core/langchain_core/messages/__init__.py index f3224bc8e1b0e..dfbf1ff3b7559 100644 --- a/libs/core/langchain_core/messages/__init__.py +++ b/libs/core/langchain_core/messages/__init__.py @@ -18,6 +18,7 @@ from typing import TYPE_CHECKING from langchain_core._import_utils import import_attr +from langchain_core.utils.utils import LC_AUTO_PREFIX, LC_ID_PREFIX, ensure_id if TYPE_CHECKING: from langchain_core.messages.ai import ( @@ -25,11 +26,8 @@ AIMessageChunk, ) from langchain_core.messages.base import ( - LC_AUTO_PREFIX, - LC_ID_PREFIX, BaseMessage, BaseMessageChunk, - ensure_id, merge_content, message_to_dict, messages_to_dict, @@ -151,7 +149,6 @@ ) _dynamic_imports = { - "ensure_id": "base", "AIMessage": "ai", "AIMessageChunk": "ai", "Annotation": "content", @@ -174,8 +171,6 @@ "FunctionMessageChunk": "function", "HumanMessage": "human", "HumanMessageChunk": "human", - "LC_AUTO_PREFIX": "base", - "LC_ID_PREFIX": "base", "NonStandardAnnotation": "content", "NonStandardContentBlock": "content", "PlainTextContentBlock": "content", diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 3492195b23151..b37b33490f783 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -10,8 +10,6 @@ from langchain_core.messages import content as types from langchain_core.messages.base import ( - LC_AUTO_PREFIX, - LC_ID_PREFIX, BaseMessage, BaseMessageChunk, merge_content, @@ -29,6 +27,7 @@ from langchain_core.utils._merge import merge_dicts, merge_lists from langchain_core.utils.json import parse_partial_json from langchain_core.utils.usage import _dict_int_op +from langchain_core.utils.utils import LC_AUTO_PREFIX, LC_ID_PREFIX logger = logging.getLogger(__name__) diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 44f7d8ceeff95..5355b75a71bf6 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -3,7 +3,6 @@ from __future__ import annotations from typing import TYPE_CHECKING, Any, Optional, Union, cast, overload -from uuid import uuid4 from pydantic import ConfigDict, Field @@ -18,18 +17,6 @@ from langchain_core.messages import content as types from langchain_core.prompts.chat import ChatPromptTemplate -LC_AUTO_PREFIX = "lc_" -"""LangChain auto-generated ID prefix for messages and content blocks.""" - -LC_ID_PREFIX = f"{LC_AUTO_PREFIX}run-" -"""Internal tracing/callback system identifier. - -Used for: -- Tracing. Every LangChain operation (LLM call, chain execution, tool use, etc.) - gets a unique run_id (UUID) -- Enables tracking parent-child relationships between operations -""" - class BaseMessage(Serializable): """Base abstract message class. @@ -357,18 +344,3 @@ def get_msg_title_repr(title: str, *, bold: bool = False) -> str: if bold: padded = get_bolded_text(padded) return f"{sep}{padded}{second_sep}" - - -def ensure_id(id_val: Optional[str]) -> str: - """Ensure the ID is a valid string, generating a new UUID if not provided. - - Auto-generated UUIDs are prefixed by ``'lc_'`` to indicate they are - LangChain-generated IDs. - - Args: - id_val: Optional string ID value to validate. - - Returns: - A string ID, either the validated provided value or a newly generated UUID4. - """ - return id_val or str(f"{LC_AUTO_PREFIX}{uuid4()}") diff --git a/libs/core/langchain_core/messages/content.py b/libs/core/langchain_core/messages/content.py index 3e7ae5174b5a7..fd46859b9181b 100644 --- a/libs/core/langchain_core/messages/content.py +++ b/libs/core/langchain_core/messages/content.py @@ -134,7 +134,7 @@ class TextContentBlock(TypedDict, extra_items=Any): from typing_extensions import NotRequired, TypedDict, TypeGuard -from langchain_core.messages.base import ensure_id +from langchain_core.utils.utils import ensure_id class Citation(TypedDict): diff --git a/libs/core/langchain_core/utils/utils.py b/libs/core/langchain_core/utils/utils.py index a7467ec51e998..28becc822e2c3 100644 --- a/libs/core/langchain_core/utils/utils.py +++ b/libs/core/langchain_core/utils/utils.py @@ -9,6 +9,7 @@ from collections.abc import Iterator, Sequence from importlib.metadata import version from typing import Any, Callable, Optional, Union, overload +from uuid import uuid4 from packaging.version import parse from pydantic import SecretStr @@ -466,3 +467,31 @@ def get_secret_from_env() -> Optional[SecretStr]: raise ValueError(msg) return get_secret_from_env + + +LC_AUTO_PREFIX = "lc_" +"""LangChain auto-generated ID prefix for messages and content blocks.""" + +LC_ID_PREFIX = "lc_run-" +"""Internal tracing/callback system identifier. + +Used for: +- Tracing. Every LangChain operation (LLM call, chain execution, tool use, etc.) + gets a unique run_id (UUID) +- Enables tracking parent-child relationships between operations +""" + + +def ensure_id(id_val: Optional[str]) -> str: + """Ensure the ID is a valid string, generating a new UUID if not provided. + + Auto-generated UUIDs are prefixed by ``'lc_'`` to indicate they are + LangChain-generated IDs. + + Args: + id_val: Optional string ID value to validate. + + Returns: + A string ID, either the validated provided value or a newly generated UUID4. + """ + return id_val or str(f"{LC_AUTO_PREFIX}{uuid4()}") From 27d81cf3d924c461540437da6debf0850c3bac4a Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 19 Aug 2025 00:28:35 -0400 Subject: [PATCH 40/73] test(openai): address some type issues in tests (#32601) nits --- .../langchain_openai/chat_models/base.py | 2 +- .../chat_models/test_responses_api.py | 26 ++++++++++--------- .../tests/unit_tests/chat_models/test_base.py | 5 ++-- .../chat_models/test_responses_stream.py | 3 ++- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index caeefdc662aae..fc040991797dd 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -1550,7 +1550,7 @@ def get_token_ids(self, text: str) -> list[int]: def get_num_tokens_from_messages( self, - messages: list[BaseMessage], + messages: Sequence[BaseMessage], tools: Optional[ Sequence[Union[dict[str, Any], type, Callable, BaseTool]] ] = None, diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index 74670bf4e123f..bd9b83752a289 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -28,8 +28,9 @@ def _check_response(response: Optional[BaseMessage]) -> None: for block in response.content: assert isinstance(block, dict) if block["type"] == "text": - assert isinstance(block["text"], str) # type: ignore[typeddict-item] - for annotation in block["annotations"]: # type: ignore[typeddict-item] + assert isinstance(block.get("text"), str) + annotations = block.get("annotations", []) + for annotation in annotations: if annotation["type"] == "file_citation": assert all( key in annotation @@ -60,7 +61,7 @@ def _check_response(response: Optional[BaseMessage]) -> None: @pytest.mark.vcr @pytest.mark.parametrize("output_version", ["responses/v1", "v1"]) def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: - llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version) # type: ignore[assignment] + llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version) first_response = llm.invoke( "What was a positive news story from today?", tools=[{"type": "web_search_preview"}], @@ -68,7 +69,7 @@ def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: _check_response(first_response) # Test streaming - full: Optional[BaseMessageChunk] = None # type: ignore[no-redef] + full: Optional[BaseMessageChunk] = None for chunk in llm.stream( "What was a positive news story from today?", tools=[{"type": "web_search_preview"}], @@ -81,7 +82,7 @@ def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: response = llm.invoke( "what about a negative one", tools=[{"type": "web_search_preview"}], - previous_response_id=first_response.response_metadata["id"], # type: ignore[typeddict-item] + previous_response_id=first_response.response_metadata["id"], ) _check_response(response) @@ -439,9 +440,11 @@ def test_stream_reasoning_summary( for block in response_1.content_blocks: if block["type"] == "reasoning": total_reasoning_blocks += 1 - assert isinstance(block["id"], str) and block["id"].startswith("rs_") - assert isinstance(block["reasoning"], str) - assert isinstance(block["index"], str) + assert isinstance(block.get("id"), str) and block.get( + "id", "" + ).startswith("rs_") + assert isinstance(block.get("reasoning"), str) + assert isinstance(block.get("index"), str) assert ( total_reasoning_blocks > 1 ) # This query typically generates multiple reasoning blocks @@ -501,10 +504,9 @@ def test_code_interpreter(output_version: Literal["v0", "responses/v1", "v1"]) - # Test streaming # Use same container - container_id = ( - tool_outputs[0].get("container_id") - or tool_outputs[0].get("extras")["container_id"] - ) + container_id = tool_outputs[0].get("container_id") or tool_outputs[0].get( + "extras", {} + ).get("container_id") llm_with_tools = llm.bind_tools( [{"type": "code_interpreter", "container": container_id}] ) diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index 542b7330378d0..c68c86cef705b 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -26,7 +26,7 @@ from langchain_core.tracers.base import BaseTracer from langchain_core.tracers.schemas import Run from openai.types.responses import ResponseOutputMessage, ResponseReasoningItem -from openai.types.responses.response import IncompleteDetails, Response, ResponseUsage +from openai.types.responses.response import IncompleteDetails, Response from openai.types.responses.response_error import ResponseError from openai.types.responses.response_file_search_tool_call import ( ResponseFileSearchToolCall, @@ -43,6 +43,7 @@ from openai.types.responses.response_usage import ( InputTokensDetails, OutputTokensDetails, + ResponseUsage, ) from pydantic import BaseModel, Field, SecretStr from typing_extensions import TypedDict @@ -1233,7 +1234,7 @@ def test_structured_outputs_parser() -> None: serialized = dumps(llm_output) deserialized = loads(serialized) assert isinstance(deserialized, ChatGeneration) - result = output_parser.invoke(deserialized.message) + result = output_parser.invoke(cast(AIMessage, deserialized.message)) assert result == parsed_response diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py b/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py index 8d7c4a14c525e..fd4e716e882ab 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_responses_stream.py @@ -21,7 +21,7 @@ ResponseTextDeltaEvent, ResponseTextDoneEvent, ) -from openai.types.responses.response import Response, ResponseUsage +from openai.types.responses.response import Response from openai.types.responses.response_output_text import ResponseOutputText from openai.types.responses.response_reasoning_item import Summary from openai.types.responses.response_reasoning_summary_part_added_event import ( @@ -33,6 +33,7 @@ from openai.types.responses.response_usage import ( InputTokensDetails, OutputTokensDetails, + ResponseUsage, ) from openai.types.shared.reasoning import Reasoning from openai.types.shared.response_format_text import ResponseFormatText From 43b9d3d9041b337ac803daeb6f29b3c08e219d61 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 19 Aug 2025 10:08:56 -0400 Subject: [PATCH 41/73] feat(core): implement dynamic translator registration for model providers (#32602) Extensible registry system for translating AI message content blocks from various model providers. Refactors the way provider-specific content is handled, moving from hardcoded logic to a plugin-like architecture. --- .../language_models/chat_models.py | 2 +- libs/core/langchain_core/messages/ai.py | 16 ++-- .../messages/block_translators/__init__.py | 80 +++++++++++++++++++ .../block_translators/amazon/__init__.py | 1 + .../block_translators/amazon/bedrock.py | 29 +++++++ .../amazon/bedrock_converse.py | 29 +++++++ .../messages/block_translators/anthropic.py | 27 +++++++ .../messages/block_translators/chroma.py | 27 +++++++ .../block_translators/google/__init__.py | 1 + .../block_translators/google/genai.py | 27 +++++++ .../block_translators/google/vertexai.py | 27 +++++++ .../messages/block_translators/groq.py | 27 +++++++ .../messages/block_translators/ollama.py | 27 +++++++ .../messages/block_translators/openai.py | 13 +++ 14 files changed, 326 insertions(+), 7 deletions(-) create mode 100644 libs/core/langchain_core/messages/block_translators/amazon/__init__.py create mode 100644 libs/core/langchain_core/messages/block_translators/amazon/bedrock.py create mode 100644 libs/core/langchain_core/messages/block_translators/amazon/bedrock_converse.py create mode 100644 libs/core/langchain_core/messages/block_translators/anthropic.py create mode 100644 libs/core/langchain_core/messages/block_translators/chroma.py create mode 100644 libs/core/langchain_core/messages/block_translators/google/__init__.py create mode 100644 libs/core/langchain_core/messages/block_translators/google/genai.py create mode 100644 libs/core/langchain_core/messages/block_translators/google/vertexai.py create mode 100644 libs/core/langchain_core/messages/block_translators/groq.py create mode 100644 libs/core/langchain_core/messages/block_translators/ollama.py diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 8aed5b134b00e..51f92a04fedc8 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -35,7 +35,6 @@ ) from langchain_core.load import dumpd, dumps from langchain_core.messages import ( - LC_ID_PREFIX, AIMessage, AnyMessage, BaseMessage, @@ -66,6 +65,7 @@ convert_to_openai_tool, ) from langchain_core.utils.pydantic import TypeBaseModel, is_basemodel_subclass +from langchain_core.utils.utils import LC_ID_PREFIX if TYPE_CHECKING: import uuid diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index b37b33490f783..83572bd231ea5 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -225,10 +225,12 @@ def content_blocks(self) -> list[types.ContentBlock]: return cast("list[types.ContentBlock]", self.content) model_provider = self.response_metadata.get("model_provider") - if model_provider == "openai": - from langchain_core.messages.block_translators import openai + if model_provider: + from langchain_core.messages.block_translators import get_translator - return openai.translate_content(self) + translator = get_translator(model_provider) + if translator: + return translator["translate_content"](self) # Otherwise, use best-effort parsing blocks = super().content_blocks @@ -372,10 +374,12 @@ def content_blocks(self) -> list[types.ContentBlock]: return cast("list[types.ContentBlock]", self.content) model_provider = self.response_metadata.get("model_provider") - if model_provider == "openai": - from langchain_core.messages.block_translators import openai + if model_provider: + from langchain_core.messages.block_translators import get_translator - return openai.translate_content_chunk(self) + translator = get_translator(model_provider) + if translator: + return translator["translate_content_chunk"](self) # Otherwise, use best-effort parsing blocks = super().content_blocks diff --git a/libs/core/langchain_core/messages/block_translators/__init__.py b/libs/core/langchain_core/messages/block_translators/__init__.py index 1dd51cc836e3a..ff58558713d13 100644 --- a/libs/core/langchain_core/messages/block_translators/__init__.py +++ b/libs/core/langchain_core/messages/block_translators/__init__.py @@ -1 +1,81 @@ """Derivations of standard content blocks from provider content.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable + +if TYPE_CHECKING: + from langchain_core.messages import AIMessage, AIMessageChunk + from langchain_core.messages import content as types + +# Provider to translator mapping +PROVIDER_TRANSLATORS: dict[str, dict[str, Callable[..., list[types.ContentBlock]]]] = {} + + +def register_translator( + provider: str, + translate_content: Callable[[AIMessage], list[types.ContentBlock]], + translate_content_chunk: Callable[[AIMessageChunk], list[types.ContentBlock]], +) -> None: + """Register content translators for a provider. + + Args: + provider: The model provider name (e.g. ``'openai'``, ``'anthropic'``). + translate_content: Function to translate ``AIMessage`` content. + translate_content_chunk: Function to translate ``AIMessageChunk`` content. + """ + PROVIDER_TRANSLATORS[provider] = { + "translate_content": translate_content, + "translate_content_chunk": translate_content_chunk, + } + + +def get_translator( + provider: str, +) -> dict[str, Callable[..., list[types.ContentBlock]]] | None: + """Get the translator functions for a provider. + + Args: + provider: The model provider name. + + Returns: + Dictionary with ``'translate_content'`` and ``'translate_content_chunk'`` + functions, or None if no translator is registered for the provider. + """ + return PROVIDER_TRANSLATORS.get(provider) + + +def _auto_register_translators() -> None: + """Automatically register all available block translators.""" + import contextlib + import importlib + import pkgutil + from pathlib import Path + + package_path = Path(__file__).parent + + # Discover all sub-modules + for module_info in pkgutil.iter_modules([str(package_path)]): + module_name = module_info.name + + # Skip the __init__ module and any private modules + if module_name.startswith("_"): + continue + + if module_info.ispkg: + # For subpackages, discover their submodules + subpackage_path = package_path / module_name + for submodule_info in pkgutil.iter_modules([str(subpackage_path)]): + submodule_name = submodule_info.name + if not submodule_name.startswith("_"): + with contextlib.suppress(ImportError, AttributeError): + importlib.import_module( + f".{module_name}.{submodule_name}", package=__name__ + ) + else: + # Import top-level translator modules + with contextlib.suppress(ImportError, AttributeError): + importlib.import_module(f".{module_name}", package=__name__) + + +_auto_register_translators() diff --git a/libs/core/langchain_core/messages/block_translators/amazon/__init__.py b/libs/core/langchain_core/messages/block_translators/amazon/__init__.py new file mode 100644 index 0000000000000..1fbfad4912db7 --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/amazon/__init__.py @@ -0,0 +1 @@ +"""Derivations of standard content blocks from Amazon content.""" diff --git a/libs/core/langchain_core/messages/block_translators/amazon/bedrock.py b/libs/core/langchain_core/messages/block_translators/amazon/bedrock.py new file mode 100644 index 0000000000000..76467152b1028 --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/amazon/bedrock.py @@ -0,0 +1,29 @@ +"""Derivations of standard content blocks from Amazon (Bedrock) content.""" + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content as types + + +def translate_content(message: AIMessage) -> list[types.ContentBlock]: + """Derive standard content blocks from a message with Bedrock content.""" + raise NotImplementedError + + +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: + """Derive standard content blocks from a chunk with Bedrock content.""" + raise NotImplementedError + + +def _register_bedrock_translator() -> None: + """Register the Bedrock translator with the central registry. + + Run automatically when the module is imported. + """ + from langchain_core.messages.block_translators import register_translator + + register_translator( + "amazon_bedrock_chat", translate_content, translate_content_chunk + ) + + +_register_bedrock_translator() diff --git a/libs/core/langchain_core/messages/block_translators/amazon/bedrock_converse.py b/libs/core/langchain_core/messages/block_translators/amazon/bedrock_converse.py new file mode 100644 index 0000000000000..5882ef2583bc8 --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/amazon/bedrock_converse.py @@ -0,0 +1,29 @@ +"""Derivations of standard content blocks from Amazon (Bedrock Converse) content.""" + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content as types + + +def translate_content(message: AIMessage) -> list[types.ContentBlock]: + """Derive standard content blocks from a message with Bedrock Converse content.""" + raise NotImplementedError + + +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: + """Derive standard content blocks from a chunk with Bedrock Converse content.""" + raise NotImplementedError + + +def _register_bedrock_converse_translator() -> None: + """Register the Bedrock Converse translator with the central registry. + + Run automatically when the module is imported. + """ + from langchain_core.messages.block_translators import register_translator + + register_translator( + "amazon_bedrock_converse_chat", translate_content, translate_content_chunk + ) + + +_register_bedrock_converse_translator() diff --git a/libs/core/langchain_core/messages/block_translators/anthropic.py b/libs/core/langchain_core/messages/block_translators/anthropic.py new file mode 100644 index 0000000000000..469b3812a570e --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/anthropic.py @@ -0,0 +1,27 @@ +"""Derivations of standard content blocks from Anthropic content.""" + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content as types + + +def translate_content(message: AIMessage) -> list[types.ContentBlock]: + """Derive standard content blocks from a message with Anthropic content.""" + raise NotImplementedError + + +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: + """Derive standard content blocks from a message chunk with Anthropic content.""" + raise NotImplementedError + + +def _register_anthropic_translator() -> None: + """Register the Anthropic translator with the central registry. + + Run automatically when the module is imported. + """ + from langchain_core.messages.block_translators import register_translator + + register_translator("anthropic", translate_content, translate_content_chunk) + + +_register_anthropic_translator() diff --git a/libs/core/langchain_core/messages/block_translators/chroma.py b/libs/core/langchain_core/messages/block_translators/chroma.py new file mode 100644 index 0000000000000..652aa8d0e1b0c --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/chroma.py @@ -0,0 +1,27 @@ +"""Derivations of standard content blocks from Chroma content.""" + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content as types + + +def translate_content(message: AIMessage) -> list[types.ContentBlock]: + """Derive standard content blocks from a message with Chroma content.""" + raise NotImplementedError + + +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: + """Derive standard content blocks from a message chunk with Chroma content.""" + raise NotImplementedError + + +def _register_chroma_translator() -> None: + """Register the Chroma translator with the central registry. + + Run automatically when the module is imported. + """ + from langchain_core.messages.block_translators import register_translator + + register_translator("chroma", translate_content, translate_content_chunk) + + +_register_chroma_translator() diff --git a/libs/core/langchain_core/messages/block_translators/google/__init__.py b/libs/core/langchain_core/messages/block_translators/google/__init__.py new file mode 100644 index 0000000000000..0c3f0698aa2a5 --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/google/__init__.py @@ -0,0 +1 @@ +"""Derivations of standard content blocks from Google content.""" diff --git a/libs/core/langchain_core/messages/block_translators/google/genai.py b/libs/core/langchain_core/messages/block_translators/google/genai.py new file mode 100644 index 0000000000000..b9761f94bc44a --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/google/genai.py @@ -0,0 +1,27 @@ +"""Derivations of standard content blocks from Google (GenAI) content.""" + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content as types + + +def translate_content(message: AIMessage) -> list[types.ContentBlock]: + """Derive standard content blocks from a message with Google (GenAI) content.""" + raise NotImplementedError + + +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: + """Derive standard content blocks from a chunk with Google (GenAI) content.""" + raise NotImplementedError + + +def _register_google_genai_translator() -> None: + """Register the Google (GenAI) translator with the central registry. + + Run automatically when the module is imported. + """ + from langchain_core.messages.block_translators import register_translator + + register_translator("google_genai", translate_content, translate_content_chunk) + + +_register_google_genai_translator() diff --git a/libs/core/langchain_core/messages/block_translators/google/vertexai.py b/libs/core/langchain_core/messages/block_translators/google/vertexai.py new file mode 100644 index 0000000000000..ae51fd4065d89 --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/google/vertexai.py @@ -0,0 +1,27 @@ +"""Derivations of standard content blocks from Google (VertexAI) content.""" + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content as types + + +def translate_content(message: AIMessage) -> list[types.ContentBlock]: + """Derive standard content blocks from a message with Google (VertexAI) content.""" + raise NotImplementedError + + +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: + """Derive standard content blocks from a chunk with Google (VertexAI) content.""" + raise NotImplementedError + + +def _register_google_vertexai_translator() -> None: + """Register the Google (VertexAI) translator with the central registry. + + Run automatically when the module is imported. + """ + from langchain_core.messages.block_translators import register_translator + + register_translator("google_vertexai", translate_content, translate_content_chunk) + + +_register_google_vertexai_translator() diff --git a/libs/core/langchain_core/messages/block_translators/groq.py b/libs/core/langchain_core/messages/block_translators/groq.py new file mode 100644 index 0000000000000..4b01dfb017f2f --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/groq.py @@ -0,0 +1,27 @@ +"""Derivations of standard content blocks from Groq content.""" + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content as types + + +def translate_content(message: AIMessage) -> list[types.ContentBlock]: + """Derive standard content blocks from a message with Groq content.""" + raise NotImplementedError + + +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: + """Derive standard content blocks from a message chunk with Groq content.""" + raise NotImplementedError + + +def _register_groq_translator() -> None: + """Register the Groq translator with the central registry. + + Run automatically when the module is imported. + """ + from langchain_core.messages.block_translators import register_translator + + register_translator("groq", translate_content, translate_content_chunk) + + +_register_groq_translator() diff --git a/libs/core/langchain_core/messages/block_translators/ollama.py b/libs/core/langchain_core/messages/block_translators/ollama.py new file mode 100644 index 0000000000000..a0f41ab76342d --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/ollama.py @@ -0,0 +1,27 @@ +"""Derivations of standard content blocks from Ollama content.""" + +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import content as types + + +def translate_content(message: AIMessage) -> list[types.ContentBlock]: + """Derive standard content blocks from a message with Ollama content.""" + raise NotImplementedError + + +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: + """Derive standard content blocks from a message chunk with Ollama content.""" + raise NotImplementedError + + +def _register_ollama_translator() -> None: + """Register the Ollama translator with the central registry. + + Run automatically when the module is imported. + """ + from langchain_core.messages.block_translators import register_translator + + register_translator("ollama", translate_content, translate_content_chunk) + + +_register_ollama_translator() diff --git a/libs/core/langchain_core/messages/block_translators/openai.py b/libs/core/langchain_core/messages/block_translators/openai.py index 6f52f643484f0..16f81e6502595 100644 --- a/libs/core/langchain_core/messages/block_translators/openai.py +++ b/libs/core/langchain_core/messages/block_translators/openai.py @@ -343,3 +343,16 @@ def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock] if isinstance(message.content, str): return _convert_to_v1_from_chat_completions_chunk(message) return _convert_to_v1_from_responses(message) + + +def _register_openai_translator() -> None: + """Register the OpenAI translator with the central registry. + + Run automatically when the module is imported. + """ + from langchain_core.messages.block_translators import register_translator + + register_translator("openai", translate_content, translate_content_chunk) + + +_register_openai_translator() From 0444e260bedc44b289bf16832e3f877517f582ad Mon Sep 17 00:00:00 2001 From: ccurme Date: Tue, 19 Aug 2025 13:25:44 -0300 Subject: [PATCH 42/73] refactor: convert message content inside `BaseChatModel` (#32606) --- .../langchain_core/language_models/_utils.py | 17 ++- .../language_models/chat_models.py | 62 +++++++-- .../language_models/chat_models/test_base.py | 130 +++++++++++++++++- .../__snapshots__/test_runnable.ambr | 28 ++-- .../langchain_openai/chat_models/base.py | 40 +----- 5 files changed, 216 insertions(+), 61 deletions(-) diff --git a/libs/core/langchain_core/language_models/_utils.py b/libs/core/langchain_core/language_models/_utils.py index 883f8c855eab2..19dcd8699485b 100644 --- a/libs/core/langchain_core/language_models/_utils.py +++ b/libs/core/langchain_core/language_models/_utils.py @@ -1,6 +1,6 @@ import re from collections.abc import Sequence -from typing import Optional +from typing import Optional, TypeVar from langchain_core.messages import BaseMessage @@ -138,3 +138,18 @@ def _normalize_messages(messages: Sequence[BaseMessage]) -> list[BaseMessage]: formatted_messages.append(formatted_message) return formatted_messages + + +T = TypeVar("T", bound=BaseMessage) + + +def _update_message_content_to_blocks(message: T, output_version: str) -> T: + return message.model_copy( + update={ + "content": message.content_blocks, + "response_metadata": { + **message.response_metadata, + "output_version": output_version, + }, + } + ) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 51f92a04fedc8..fe52f37f1937a 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -27,7 +27,10 @@ Callbacks, ) from langchain_core.globals import get_llm_cache -from langchain_core.language_models._utils import _normalize_messages +from langchain_core.language_models._utils import ( + _normalize_messages, + _update_message_content_to_blocks, +) from langchain_core.language_models.base import ( BaseLanguageModel, LangSmithParams, @@ -65,7 +68,7 @@ convert_to_openai_tool, ) from langchain_core.utils.pydantic import TypeBaseModel, is_basemodel_subclass -from langchain_core.utils.utils import LC_ID_PREFIX +from langchain_core.utils.utils import LC_ID_PREFIX, from_env if TYPE_CHECKING: import uuid @@ -334,16 +337,23 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): """ - output_version: str = "v0" - """Version of ``AIMessage`` output format to use. + output_version: str = Field( + default_factory=from_env("LC_OUTPUT_VERSION", default="v0") + ) + """Version of ``AIMessage`` output format to store in message content. + + ``AIMessage.content_blocks`` will lazily parse the contents of ``content`` into a + standard format. This flag can be used to additionally store the standard format + in message content, e.g., for serialization purposes. - This field is used to roll-out new output formats for chat model ``AIMessage``s - in a backwards-compatible way. + Supported values: - ``'v1'`` standardizes output format using a list of typed ContentBlock dicts. We - recommend this for new applications. + - ``"v0"``: provider-specific format in content (can lazily-parse with + ``.content_blocks``) + - ``"v1"``: standardized format in content (consistent with ``.content_blocks``) - All chat models currently support the default of ``'v0'``. + Partner packages (e.g., ``langchain-openai``) can also use this field to roll out + new content formats in a backward-compatible way. .. versionadded:: 1.0 @@ -545,6 +555,11 @@ def stream( if chunk.message.id is None: chunk.message.id = run_id chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) + if self.output_version == "v1": + # Overwrite .content with .content_blocks + chunk.message = _update_message_content_to_blocks( + chunk.message, "v1" + ) run_manager.on_llm_new_token( cast("str", chunk.message.content), chunk=chunk ) @@ -642,6 +657,11 @@ async def astream( if chunk.message.id is None: chunk.message.id = run_id chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) + if self.output_version == "v1": + # Overwrite .content with .content_blocks + chunk.message = _update_message_content_to_blocks( + chunk.message, "v1" + ) await run_manager.on_llm_new_token( cast("str", chunk.message.content), chunk=chunk ) @@ -1100,6 +1120,11 @@ def _generate_with_cache( if run_manager: if chunk.message.id is None: chunk.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}" + if self.output_version == "v1": + # Overwrite .content with .content_blocks + chunk.message = _update_message_content_to_blocks( + chunk.message, "v1" + ) run_manager.on_llm_new_token( cast("str", chunk.message.content), chunk=chunk ) @@ -1112,6 +1137,13 @@ def _generate_with_cache( else: result = self._generate(messages, stop=stop, **kwargs) + if self.output_version == "v1": + # Overwrite .content with .content_blocks + for generation in result.generations: + generation.message = _update_message_content_to_blocks( + generation.message, "v1" + ) + # Add response metadata to each generation for idx, generation in enumerate(result.generations): if run_manager and generation.message.id is None: @@ -1173,6 +1205,11 @@ async def _agenerate_with_cache( if run_manager: if chunk.message.id is None: chunk.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}" + if self.output_version == "v1": + # Overwrite .content with .content_blocks + chunk.message = _update_message_content_to_blocks( + chunk.message, "v1" + ) await run_manager.on_llm_new_token( cast("str", chunk.message.content), chunk=chunk ) @@ -1185,6 +1222,13 @@ async def _agenerate_with_cache( else: result = await self._agenerate(messages, stop=stop, **kwargs) + if self.output_version == "v1": + # Overwrite .content with .content_blocks + for generation in result.generations: + generation.message = _update_message_content_to_blocks( + generation.message, "v1" + ) + # Add response metadata to each generation for idx, generation in enumerate(result.generations): if run_manager and generation.message.id is None: diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index 37b05ed825566..c21e5ba86b154 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -14,11 +14,15 @@ ParrotFakeChatModel, ) from langchain_core.language_models._utils import _normalize_messages -from langchain_core.language_models.fake_chat_models import FakeListChatModelError +from langchain_core.language_models.fake_chat_models import ( + FakeListChatModelError, + GenericFakeChatModel, +) from langchain_core.messages import ( AIMessage, AIMessageChunk, BaseMessage, + BaseMessageChunk, HumanMessage, SystemMessage, ) @@ -654,3 +658,127 @@ def test_normalize_messages_edge_cases() -> None: ) ] assert messages == _normalize_messages(messages) + + +def test_output_version_invoke(monkeypatch: Any) -> None: + messages = [AIMessage("hello")] + + llm = GenericFakeChatModel(messages=iter(messages), output_version="v1") + response = llm.invoke("hello") + assert response.content == [{"type": "text", "text": "hello"}] + assert response.response_metadata["output_version"] == "v1" + + llm = GenericFakeChatModel(messages=iter(messages)) + response = llm.invoke("hello") + assert response.content == "hello" + + monkeypatch.setenv("LC_OUTPUT_VERSION", "v1") + llm = GenericFakeChatModel(messages=iter(messages)) + response = llm.invoke("hello") + assert response.content == [{"type": "text", "text": "hello"}] + assert response.response_metadata["output_version"] == "v1" + + +async def test_output_version_ainvoke(monkeypatch: Any) -> None: + messages = [AIMessage("hello")] + + llm = GenericFakeChatModel(messages=iter(messages), output_version="v1") + response = await llm.ainvoke("hello") + assert response.content == [{"type": "text", "text": "hello"}] + assert response.response_metadata["output_version"] == "v1" + + llm = GenericFakeChatModel(messages=iter(messages)) + response = await llm.ainvoke("hello") + assert response.content == "hello" + + monkeypatch.setenv("LC_OUTPUT_VERSION", "v1") + llm = GenericFakeChatModel(messages=iter(messages)) + response = await llm.ainvoke("hello") + assert response.content == [{"type": "text", "text": "hello"}] + assert response.response_metadata["output_version"] == "v1" + + +def test_output_version_stream(monkeypatch: Any) -> None: + messages = [AIMessage("foo bar")] + + llm = GenericFakeChatModel(messages=iter(messages), output_version="v1") + full: Optional[BaseMessageChunk] = None + for chunk in llm.stream("hello"): + assert isinstance(chunk, AIMessageChunk) + assert isinstance(chunk.content, list) + assert len(chunk.content) == 1 + block = chunk.content[0] + assert isinstance(block, dict) + assert block["type"] == "text" + assert block["text"] + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert full.response_metadata["output_version"] == "v1" + + llm = GenericFakeChatModel(messages=iter(messages)) + full = None + for chunk in llm.stream("hello"): + assert isinstance(chunk, AIMessageChunk) + assert isinstance(chunk.content, str) + assert chunk.content + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert full.content == "foo bar" + + monkeypatch.setenv("LC_OUTPUT_VERSION", "v1") + llm = GenericFakeChatModel(messages=iter(messages)) + full = None + for chunk in llm.stream("hello"): + assert isinstance(chunk, AIMessageChunk) + assert isinstance(chunk.content, list) + assert len(chunk.content) == 1 + block = chunk.content[0] + assert isinstance(block, dict) + assert block["type"] == "text" + assert block["text"] + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert full.response_metadata["output_version"] == "v1" + + +async def test_output_version_astream(monkeypatch: Any) -> None: + messages = [AIMessage("foo bar")] + + llm = GenericFakeChatModel(messages=iter(messages), output_version="v1") + full: Optional[BaseMessageChunk] = None + async for chunk in llm.astream("hello"): + assert isinstance(chunk, AIMessageChunk) + assert isinstance(chunk.content, list) + assert len(chunk.content) == 1 + block = chunk.content[0] + assert isinstance(block, dict) + assert block["type"] == "text" + assert block["text"] + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert full.response_metadata["output_version"] == "v1" + + llm = GenericFakeChatModel(messages=iter(messages)) + full = None + async for chunk in llm.astream("hello"): + assert isinstance(chunk, AIMessageChunk) + assert isinstance(chunk.content, str) + assert chunk.content + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert full.content == "foo bar" + + monkeypatch.setenv("LC_OUTPUT_VERSION", "v1") + llm = GenericFakeChatModel(messages=iter(messages)) + full = None + async for chunk in llm.astream("hello"): + assert isinstance(chunk, AIMessageChunk) + assert isinstance(chunk.content, list) + assert len(chunk.content) == 1 + block = chunk.content[0] + assert isinstance(block, dict) + assert block["type"] == "text" + assert block["text"] + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert full.response_metadata["output_version"] == "v1" diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index 7d5642ce853a2..04bab565b38b3 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -97,7 +97,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=['foo, bar'])", + "repr": "FakeListChatModel(output_version='v0', responses=['foo, bar'])", "name": "FakeListChatModel" } ], @@ -227,7 +227,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=['baz, qux'])", + "repr": "FakeListChatModel(output_version='v0', responses=['baz, qux'])", "name": "FakeListChatModel" } ], @@ -346,7 +346,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=['foo, bar'])", + "repr": "FakeListChatModel(output_version='v0', responses=['foo, bar'])", "name": "FakeListChatModel" }, { @@ -457,7 +457,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=['baz, qux'])", + "repr": "FakeListChatModel(output_version='v0', responses=['baz, qux'])", "name": "FakeListChatModel" } ], @@ -1009,7 +1009,7 @@ # name: test_prompt_with_chat_model ''' ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a nice assistant.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='{question}'), additional_kwargs={})]) - | FakeListChatModel(responses=['foo']) + | FakeListChatModel(output_version='v0', responses=['foo']) ''' # --- # name: test_prompt_with_chat_model.1 @@ -1109,7 +1109,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=['foo'])", + "repr": "FakeListChatModel(output_version='v0', responses=['foo'])", "name": "FakeListChatModel" } }, @@ -1220,7 +1220,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=['foo, bar'])", + "repr": "FakeListChatModel(output_version='v0', responses=['foo, bar'])", "name": "FakeListChatModel" } ], @@ -1249,7 +1249,7 @@ # name: test_prompt_with_chat_model_async ''' ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a nice assistant.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='{question}'), additional_kwargs={})]) - | FakeListChatModel(responses=['foo']) + | FakeListChatModel(output_version='v0', responses=['foo']) ''' # --- # name: test_prompt_with_chat_model_async.1 @@ -1349,7 +1349,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=['foo'])", + "repr": "FakeListChatModel(output_version='v0', responses=['foo'])", "name": "FakeListChatModel" } }, @@ -13863,7 +13863,7 @@ just_to_test_lambda: RunnableLambda(...) } | ChatPromptTemplate(input_variables=['documents', 'question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a nice assistant.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['documents', 'question'], input_types={}, partial_variables={}, template='Context:\n{documents}\n\nQuestion:\n{question}'), additional_kwargs={})]) - | FakeListChatModel(responses=['foo, bar']) + | FakeListChatModel(output_version='v0', responses=['foo, bar']) | CommaSeparatedListOutputParser() ''' # --- @@ -14066,7 +14066,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=['foo, bar'])", + "repr": "FakeListChatModel(output_version='v0', responses=['foo, bar'])", "name": "FakeListChatModel" } ], @@ -14092,7 +14092,7 @@ ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a nice assistant.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='{question}'), additional_kwargs={})]) | RunnableLambda(...) | { - chat: FakeListChatModel(responses=["i'm a chatbot"]), + chat: FakeListChatModel(output_version='v0', responses=["i'm a chatbot"]), llm: FakeListLLM(responses=["i'm a textbot"]) } ''' @@ -14218,7 +14218,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=[\"i'm a chatbot\"])", + "repr": "FakeListChatModel(output_version='v0', responses=[\"i'm a chatbot\"])", "name": "FakeListChatModel" }, "llm": { @@ -14373,7 +14373,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(responses=[\"i'm a chatbot\"])", + "repr": "FakeListChatModel(output_version='v0', responses=[\"i'm a chatbot\"])", "name": "FakeListChatModel" }, "kwargs": { diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index fc040991797dd..ce3f01b12decd 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -69,10 +69,6 @@ OutputTokenDetails, UsageMetadata, ) -from langchain_core.messages.block_translators.openai import ( - translate_content, - translate_content_chunk, -) from langchain_core.messages.tool import tool_call_chunk from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser from langchain_core.output_parsers.openai_tools import ( @@ -719,12 +715,9 @@ class BaseChatOpenAI(BaseChatModel): - ``'v0'``: AIMessage format as of langchain-openai 0.3.x. - ``'responses/v1'``: Formats Responses API output - items into AIMessage content blocks. + items into AIMessage content blocks (Responses API only) - ``"v1"``: v1 of LangChain cross-provider standard. - Currently only impacts the Responses API. ``output_version='v1'`` is - recommended. - .. versionadded:: 0.3.25 """ @@ -957,13 +950,6 @@ def _convert_chunk_to_generation_chunk( if usage_metadata and isinstance(message_chunk, AIMessageChunk): message_chunk.usage_metadata = usage_metadata - if self.output_version == "v1": - message_chunk.content = cast( - "Union[str, list[Union[str, dict]]]", - translate_content_chunk(cast(AIMessageChunk, message_chunk)), - ) - message_chunk.response_metadata["output_version"] = "v1" - generation_chunk = ChatGenerationChunk( message=message_chunk, generation_info=generation_info or None ) @@ -1329,13 +1315,6 @@ def _create_chat_result( if hasattr(message, "refusal"): generations[0].message.additional_kwargs["refusal"] = message.refusal - if self.output_version == "v1": - generations[0].message.content = cast( - Union[str, list[Union[str, dict]]], - translate_content(cast(AIMessage, generations[0].message)), - ) - generations[0].message.response_metadata["output_version"] = "v1" - return ChatResult(generations=generations, llm_output=llm_output) async def _astream( @@ -4091,13 +4070,7 @@ def _construct_lc_result_from_responses_api( ) if output_version == "v0": message = _convert_to_v03_ai_message(message) - elif output_version == "v1": - message.content = cast( - Union[str, list[Union[str, dict]]], translate_content(message) - ) - message.response_metadata["output_version"] = "v1" - else: - pass + return ChatResult(generations=[ChatGeneration(message=message)]) @@ -4163,6 +4136,7 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: response_metadata = metadata else: response_metadata = {} + response_metadata["model_provider"] = "openai" usage_metadata = None id = None if chunk.type == "response.output_text.delta": @@ -4324,13 +4298,7 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: AIMessageChunk, _convert_to_v03_ai_message(message, has_reasoning=has_reasoning), ) - elif output_version == "v1": - message.content = cast( - Union[str, list[Union[str, dict]]], translate_content_chunk(message) - ) - message.response_metadata["output_version"] = "v1" - else: - pass + return ( current_index, current_output_index, From 5bcf7d006f8b63f87f3cd0a521b5910d4a484750 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Thu, 21 Aug 2025 14:48:23 -0400 Subject: [PATCH 43/73] refactor(core): data block handling, normalize message formats, strip IDs from messages (#32572) > [!WARNING] > **BREAKING:** Simplifies message normalization to single consistent path, requiring partner package updates **Key Changes:** - Consistent multimodal handling: - OpenAI `image_url` blocks pass through unchanged (broad compatibility) - OpenAI `input_audio` and `file` blocks convert to v1 standard equivalents - Legacy v0 multimodal blocks convert to v1 standard - Everything else passes through unchanged - Partner packages must update content block parsing logic **Partner Updates** `output_version` affects how messages are serialized into `.content`. `_normalize_messages()` will now upgrade v0 content to v1, so, all partners now receive v1 format input regardless of `output_version`. Migration: - Partner packages must update to handle v1 input content blocks - `output_version` still controls serialization format of responses (unchanged) --------- Co-authored-by: Chester Curme --- .../langchain_core/language_models/_utils.py | 288 ++++++++--- libs/core/langchain_core/messages/base.py | 45 +- .../messages/block_translators/langchain.py | 304 +++++++++++ .../messages/block_translators/openai.py | 66 ++- libs/core/langchain_core/messages/content.py | 54 +- .../language_models/chat_models/test_base.py | 477 ++++++++++++------ .../chat_models/test_rate_limiting.py | 8 +- .../messages/block_translators/test_openai.py | 32 ++ 8 files changed, 1005 insertions(+), 269 deletions(-) create mode 100644 libs/core/langchain_core/messages/block_translators/langchain.py diff --git a/libs/core/langchain_core/language_models/_utils.py b/libs/core/langchain_core/language_models/_utils.py index 19dcd8699485b..94680674e3a64 100644 --- a/libs/core/langchain_core/language_models/_utils.py +++ b/libs/core/langchain_core/language_models/_utils.py @@ -1,12 +1,30 @@ import re from collections.abc import Sequence -from typing import Optional, TypeVar +from typing import ( + TYPE_CHECKING, + Literal, + Optional, + TypedDict, + TypeVar, + Union, +) -from langchain_core.messages import BaseMessage +if TYPE_CHECKING: + from langchain_core.messages import BaseMessage +from langchain_core.messages.content import ( + ContentBlock, +) def _is_openai_data_block(block: dict) -> bool: - """Check if the block contains multimodal data in OpenAI Chat Completions format.""" + """Check if the block contains multimodal data in OpenAI Chat Completions format. + + Supports both data and ID-style blocks (e.g. ``'file_data'`` and ``'file_id'``) + + If additional keys are present, they are ignored / will not affect outcome as long + as the required keys are present and valid. + + """ if block.get("type") == "image_url": if ( (set(block.keys()) <= {"type", "image_url", "detail"}) @@ -15,29 +33,43 @@ def _is_openai_data_block(block: dict) -> bool: ): url = image_url.get("url") if isinstance(url, str): + # Required per OpenAI spec + return True + # Ignore `'detail'` since it's optional and specific to OpenAI + + elif block.get("type") == "input_audio": + if (audio := block.get("input_audio")) and isinstance(audio, dict): + audio_data = audio.get("data") + audio_format = audio.get("format") + # Both required per OpenAI spec + if isinstance(audio_data, str) and isinstance(audio_format, str): return True elif block.get("type") == "file": if (file := block.get("file")) and isinstance(file, dict): file_data = file.get("file_data") - if isinstance(file_data, str): - return True - - elif block.get("type") == "input_audio": - if (input_audio := block.get("input_audio")) and isinstance(input_audio, dict): - audio_data = input_audio.get("data") - audio_format = input_audio.get("format") - if isinstance(audio_data, str) and isinstance(audio_format, str): + file_id = file.get("file_id") + # Files can be either base64-encoded or pre-uploaded with an ID + if isinstance(file_data, str) or isinstance(file_id, str): return True else: return False + # Has no `'type'` key return False -def _parse_data_uri(uri: str) -> Optional[dict]: - """Parse a data URI into its components. If parsing fails, return None. +class ParsedDataUri(TypedDict): + source_type: Literal["base64"] + data: str + mime_type: str + + +def _parse_data_uri(uri: str) -> Optional[ParsedDataUri]: + """Parse a data URI into its components. + + If parsing fails, return None. If either MIME type or data is missing, return None. Example: @@ -57,90 +89,208 @@ def _parse_data_uri(uri: str) -> Optional[dict]: match = re.match(regex, uri) if match is None: return None + + mime_type = match.group("mime_type") + data = match.group("data") + if not mime_type or not data: + return None + return { "source_type": "base64", - "data": match.group("data"), - "mime_type": match.group("mime_type"), + "data": data, + "mime_type": mime_type, } -def _convert_openai_format_to_data_block(block: dict) -> dict: - """Convert OpenAI image content block to standard data content block. +def _normalize_messages( + messages: Sequence["BaseMessage"], +) -> list["BaseMessage"]: + """Normalize message formats to LangChain v1 standard content blocks. - If parsing fails, pass-through. + Chat models already implement support for: + - Images in OpenAI Chat Completions format + These will be passed through unchanged + - LangChain v1 standard content blocks - Args: - block: The OpenAI image content block to convert. + This function extends support to: + - `Audio `__ and + `file `__ data in OpenAI + Chat Completions format + - Images are technically supported but we expect chat models to handle them + directly; this may change in the future + - LangChain v0 standard content blocks for backward compatibility - Returns: - The converted standard data content block. - """ - if block["type"] == "image_url": - parsed = _parse_data_uri(block["image_url"]["url"]) - if parsed is not None: - parsed["type"] = "image" - return parsed - return block - - if block["type"] == "file": - parsed = _parse_data_uri(block["file"]["file_data"]) - if parsed is not None: - parsed["type"] = "file" - if filename := block["file"].get("filename"): - parsed["filename"] = filename - return parsed - return block - - if block["type"] == "input_audio": - data = block["input_audio"].get("data") - audio_format = block["input_audio"].get("format") - if data and audio_format: - return { - "type": "audio", - "source_type": "base64", - "data": data, - "mime_type": f"audio/{audio_format}", + .. versionchanged:: 1.0.0 + In previous versions, this function returned messages in LangChain v0 format. + Now, it returns messages in LangChain v1 format, which upgraded chat models now + expect to receive when passing back in message history. For backward + compatibility, this function will convert v0 message content to v1 format. + + .. dropdown:: v0 Content Block Schemas + + ``URLContentBlock``: + + .. codeblock:: + + { + mime_type: NotRequired[str] + type: Literal['image', 'audio', 'file'], + source_type: Literal['url'], + url: str, } - return block - return block + ``Base64ContentBlock``: + + .. codeblock:: + + { + mime_type: NotRequired[str] + type: Literal['image', 'audio', 'file'], + source_type: Literal['base64'], + data: str, + } + ``IDContentBlock``: -def _normalize_messages(messages: Sequence[BaseMessage]) -> list[BaseMessage]: - """Extend support for message formats. + (In practice, this was never used) + + .. codeblock:: + + { + type: Literal['image', 'audio', 'file'], + source_type: Literal['id'], + id: str, + } + + ``PlainTextContentBlock``: + + .. codeblock:: + + { + mime_type: NotRequired[str] + type: Literal['file'], + source_type: Literal['text'], + url: str, + } + + If a v1 message is passed in, it will be returned as-is, meaning it is safe to + always pass in v1 messages to this function for assurance. + + For posterity, here are the OpenAI Chat Completions schemas we expect: + + Chat Completions image. Can be URL-based or base64-encoded. Supports MIME types + png, jpeg/jpg, webp, static gif: + { + "type": Literal['image_url'], + "image_url": { + "url": Union["data:$MIME_TYPE;base64,$BASE64_ENCODED_IMAGE", "$IMAGE_URL"], + "detail": Literal['low', 'high', 'auto'] = 'auto', # Supported by OpenAI + } + } + + Chat Completions audio: + { + "type": Literal['input_audio'], + "input_audio": { + "format": Literal['wav', 'mp3'], + "data": str = "$BASE64_ENCODED_AUDIO", + }, + } + + Chat Completions files: either base64 or pre-uploaded file ID + { + "type": Literal['file'], + "file": Union[ + { + "filename": Optional[str] = "$FILENAME", + "file_data": str = "$BASE64_ENCODED_FILE", + }, + { + "file_id": str = "$FILE_ID", # For pre-uploaded files to OpenAI + }, + ], + } - Chat models implement support for images in OpenAI Chat Completions format, as well - as other multimodal data as standard data blocks. This function extends support to - audio and file data in OpenAI Chat Completions format by converting them to standard - data blocks. """ + from langchain_core.messages.block_translators.langchain import ( + _convert_legacy_v0_content_block_to_v1, + _convert_openai_format_to_data_block, + ) + formatted_messages = [] for message in messages: + # We preserve input messages - the caller may reuse them elsewhere and expects + # them to remain unchanged. We only create a copy if we need to translate. formatted_message = message + if isinstance(message.content, list): for idx, block in enumerate(message.content): + # OpenAI Chat Completions multimodal data blocks to v1 standard if ( isinstance(block, dict) - # Subset to (PDF) files and audio, as most relevant chat models - # support images in OAI format (and some may not yet support the - # standard data block format) - and block.get("type") in {"file", "input_audio"} + and block.get("type") in {"input_audio", "file"} + # Discriminate between OpenAI/LC format since they share `'type'` and _is_openai_data_block(block) ): - if formatted_message is message: - formatted_message = message.model_copy() - # Also shallow-copy content - formatted_message.content = list(formatted_message.content) - - formatted_message.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy - _convert_openai_format_to_data_block(block) - ) + formatted_message = _ensure_message_copy(message, formatted_message) + + converted_block = _convert_openai_format_to_data_block(block) + _update_content_block(formatted_message, idx, converted_block) + + # Convert multimodal LangChain v0 to v1 standard content blocks + elif ( + isinstance(block, dict) + and block.get("type") + in { + "image", + "audio", + "file", + } + and block.get("source_type") # v1 doesn't have `source_type` + in { + "url", + "base64", + "id", + "text", + } + ): + formatted_message = _ensure_message_copy(message, formatted_message) + + converted_block = _convert_legacy_v0_content_block_to_v1(block) + _update_content_block(formatted_message, idx, converted_block) + continue + + # else, pass through blocks that look like they have v1 format unchanged + formatted_messages.append(formatted_message) return formatted_messages -T = TypeVar("T", bound=BaseMessage) +T = TypeVar("T", bound="BaseMessage") + + +def _ensure_message_copy(message: T, formatted_message: T) -> T: + """Create a copy of the message if it hasn't been copied yet.""" + if formatted_message is message: + formatted_message = message.model_copy() + # Shallow-copy content list to allow modifications + formatted_message.content = list(formatted_message.content) + return formatted_message + + +def _update_content_block( + formatted_message: "BaseMessage", idx: int, new_block: Union[ContentBlock, dict] +) -> None: + """Update a content block at the given index, handling type issues.""" + # Type ignore needed because: + # - `BaseMessage.content` is typed as `Union[str, list[Union[str, dict]]]` + # - When content is str, indexing fails (index error) + # - When content is list, the items are `Union[str, dict]` but we're assigning + # `Union[ContentBlock, dict]` where ContentBlock is richer than dict + # - This is safe because we only call this when we've verified content is a list and + # we're doing content block conversions + formatted_message.content[idx] = new_block # type: ignore[index, assignment] def _update_message_content_to_blocks(message: T, output_version: str) -> T: diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 5355b75a71bf6..3452740b46ef2 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -7,6 +7,13 @@ from pydantic import ConfigDict, Field from langchain_core.load.serializable import Serializable +from langchain_core.messages.block_translators.langchain import ( + _convert_legacy_v0_content_block_to_v1, + _convert_v0_multimodal_input_to_v1, +) +from langchain_core.messages.block_translators.openai import ( + _convert_to_v1_from_chat_completions_input, +) from langchain_core.utils import get_bolded_text from langchain_core.utils._merge import merge_dicts, merge_lists from langchain_core.utils.interactive_env import is_interactive_env @@ -124,6 +131,8 @@ def content_blocks(self) -> list[types.ContentBlock]: from langchain_core.messages import content as types blocks: list[types.ContentBlock] = [] + + # First pass: convert to standard blocks content = ( [self.content] if isinstance(self.content, str) and self.content @@ -134,18 +143,26 @@ def content_blocks(self) -> list[types.ContentBlock]: blocks.append({"type": "text", "text": item}) elif isinstance(item, dict): item_type = item.get("type") - if item_type not in types.KNOWN_BLOCK_TYPES: - msg = ( - f"Non-standard content block type '{item_type}'. Ensure " - "the model supports `output_version='v1'` or higher and " - "that this attribute is set on initialization." + # Try to convert potential v0 format first + converted_block = _convert_legacy_v0_content_block_to_v1(item) + if converted_block is not item: # Conversion happened + blocks.append(cast("types.ContentBlock", converted_block)) + elif item_type is None or item_type not in types.KNOWN_BLOCK_TYPES: + blocks.append( + cast( + "types.ContentBlock", + {"type": "non_standard", "value": item}, + ) ) - raise ValueError(msg) - blocks.append(cast("types.ContentBlock", item)) - else: - pass + else: + blocks.append(cast("types.ContentBlock", item)) + + # Subsequent passes: attempt to unpack non-standard blocks + blocks = _convert_v0_multimodal_input_to_v1(blocks) + # blocks = _convert_to_v1_from_anthropic_input(blocks) + # ... - return blocks + return _convert_to_v1_from_chat_completions_input(blocks) def text(self) -> str: """Get the text content of the message. @@ -211,7 +228,9 @@ def merge_content( Returns: The merged content. """ - merged = first_content + merged: Union[str, list[Union[str, dict]]] + merged = "" if first_content is None else first_content + for content in contents: # If current is a string if isinstance(merged, str): @@ -232,8 +251,8 @@ def merge_content( # If second content is an empty string, treat as a no-op elif content == "": pass - else: - # Otherwise, add the second content as a new element of the list + # Otherwise, add the second content as a new element of the list + elif merged: merged.append(content) return merged diff --git a/libs/core/langchain_core/messages/block_translators/langchain.py b/libs/core/langchain_core/messages/block_translators/langchain.py new file mode 100644 index 0000000000000..4b5e4479835a3 --- /dev/null +++ b/libs/core/langchain_core/messages/block_translators/langchain.py @@ -0,0 +1,304 @@ +"""Derivations of standard content blocks from LangChain content.""" + +from typing import Any, Union, cast + +from langchain_core.language_models._utils import _parse_data_uri +from langchain_core.messages import content as types + + +def _convert_v0_multimodal_input_to_v1( + blocks: list[types.ContentBlock], +) -> list[types.ContentBlock]: + """Convert v0 multimodal blocks to v1 format. + + Processes non_standard blocks that might be v0 format and converts them + to proper v1 ContentBlocks. + + Args: + blocks: List of content blocks to process. + + Returns: + Updated list with v0 blocks converted to v1 format. + """ + converted_blocks = [] + for block in blocks: + if ( + isinstance(block, dict) + and block.get("type") == "non_standard" + and "value" in block + and isinstance(block["value"], dict) # type: ignore[typeddict-item] + ): + # We know this is a NonStandardContentBlock, so we can safely access value + value = cast("Any", block)["value"] + # Check if this looks like v0 format + if ( + value.get("type") in {"image", "audio", "file"} + and "source_type" in value + ): + converted_block = _convert_legacy_v0_content_block_to_v1(value) + converted_blocks.append(cast("types.ContentBlock", converted_block)) + else: + converted_blocks.append(block) + else: + converted_blocks.append(block) + + return converted_blocks + + +def _convert_legacy_v0_content_block_to_v1( + block: dict, +) -> Union[types.ContentBlock, dict]: + """Convert a LangChain v0 content block to v1 format. + + Preserves unknown keys as extras to avoid data loss. + + Returns the original block unchanged if it's not in v0 format. + + """ + + def _extract_v0_extras(block_dict: dict, known_keys: set[str]) -> dict[str, Any]: + """Extract unknown keys from v0 block to preserve as extras.""" + return {k: v for k, v in block_dict.items() if k not in known_keys} + + # Check if this is actually a v0 format block + block_type = block.get("type") + if block_type not in {"image", "audio", "file"} or "source_type" not in block: + # Not a v0 format block, return unchanged + return block + + if block.get("type") == "image": + source_type = block.get("source_type") + if source_type == "url": + known_keys = {"type", "source_type", "url", "mime_type"} + extras = _extract_v0_extras(block, known_keys) + if "id" in block: + return types.create_image_block( + url=block["url"], + mime_type=block.get("mime_type"), + id=block["id"], + **extras, + ) + + # Don't construct with an ID if not present in original block + v1_block = types.ImageContentBlock(type="image", url=block["url"]) + if block.get("mime_type"): + v1_block["mime_type"] = block["mime_type"] + + for key, value in extras.items(): + if value is not None: + v1_block["extras"] = {} + v1_block["extras"][key] = value + return v1_block + if source_type == "base64": + known_keys = {"type", "source_type", "data", "mime_type"} + extras = _extract_v0_extras(block, known_keys) + if "id" in block: + return types.create_image_block( + base64=block["data"], + mime_type=block.get("mime_type"), + id=block["id"], + **extras, + ) + + v1_block = types.ImageContentBlock(type="image", base64=block["data"]) + if block.get("mime_type"): + v1_block["mime_type"] = block["mime_type"] + + for key, value in extras.items(): + if value is not None: + v1_block["extras"] = {} + v1_block["extras"][key] = value + return v1_block + if source_type == "id": + known_keys = {"type", "source_type", "id"} + extras = _extract_v0_extras(block, known_keys) + # For id `source_type`, `id` is the file reference, not block ID + v1_block = types.ImageContentBlock(type="image", file_id=block["id"]) + + for key, value in extras.items(): + if value is not None: + v1_block["extras"] = {} + v1_block["extras"][key] = value + + return v1_block + elif block.get("type") == "audio": + source_type = block.get("source_type") + if source_type == "url": + known_keys = {"type", "source_type", "url", "mime_type"} + extras = _extract_v0_extras(block, known_keys) + return types.create_audio_block( + url=block["url"], mime_type=block.get("mime_type"), **extras + ) + if source_type == "base64": + known_keys = {"type", "source_type", "data", "mime_type"} + extras = _extract_v0_extras(block, known_keys) + return types.create_audio_block( + base64=block["data"], mime_type=block.get("mime_type"), **extras + ) + if source_type == "id": + known_keys = {"type", "source_type", "id"} + extras = _extract_v0_extras(block, known_keys) + return types.create_audio_block(file_id=block["id"], **extras) + elif block.get("type") == "file": + source_type = block.get("source_type") + if source_type == "url": + known_keys = {"type", "source_type", "url", "mime_type"} + extras = _extract_v0_extras(block, known_keys) + return types.create_file_block( + url=block["url"], mime_type=block.get("mime_type"), **extras + ) + if source_type == "base64": + known_keys = {"type", "source_type", "data", "mime_type"} + extras = _extract_v0_extras(block, known_keys) + return types.create_file_block( + base64=block["data"], mime_type=block.get("mime_type"), **extras + ) + if source_type == "id": + known_keys = {"type", "source_type", "id"} + extras = _extract_v0_extras(block, known_keys) + return types.create_file_block(file_id=block["id"], **extras) + if source_type == "text": + known_keys = {"type", "source_type", "url", "mime_type"} + extras = _extract_v0_extras(block, known_keys) + return types.create_plaintext_block( + # In v0, URL points to the text file content + text=block["url"], + **extras, + ) + + # If we can't convert, return the block unchanged + return block + + +def _convert_openai_format_to_data_block( + block: dict, +) -> Union[types.ContentBlock, dict[Any, Any]]: + """Convert OpenAI image/audio/file content block to respective v1 multimodal block. + + We expect that the incoming block is verified to be in OpenAI Chat Completions + format. + + If parsing fails, passes block through unchanged. + + Mappings (Chat Completions to LangChain v1): + - Image -> `ImageContentBlock` + - Audio -> `AudioContentBlock` + - File -> `FileContentBlock` + + """ + + # Extract extra keys to put them in `extras` + def _extract_extras(block_dict: dict, known_keys: set[str]) -> dict[str, Any]: + """Extract unknown keys from block to preserve as extras.""" + return {k: v for k, v in block_dict.items() if k not in known_keys} + + # base64-style image block + if (block["type"] == "image_url") and ( + parsed := _parse_data_uri(block["image_url"]["url"]) + ): + known_keys = {"type", "image_url"} + extras = _extract_extras(block, known_keys) + + # Also extract extras from nested image_url dict + image_url_known_keys = {"url"} + image_url_extras = _extract_extras(block["image_url"], image_url_known_keys) + + # Merge extras + all_extras = {**extras} + for key, value in image_url_extras.items(): + if key == "detail": # Don't rename + all_extras["detail"] = value + else: + all_extras[f"image_url_{key}"] = value + + return types.create_image_block( + # Even though this is labeled as `url`, it can be base64-encoded + base64=block["image_url"]["url"], + mime_type=parsed["mime_type"], + **all_extras, + ) + + # url-style image block + if (block["type"] == "image_url") and isinstance( + block["image_url"].get("url"), str + ): + known_keys = {"type", "image_url"} + extras = _extract_extras(block, known_keys) + + image_url_known_keys = {"url"} + image_url_extras = _extract_extras(block["image_url"], image_url_known_keys) + + all_extras = {**extras} + for key, value in image_url_extras.items(): + if key == "detail": # Don't rename + all_extras["detail"] = value + else: + all_extras[f"image_url_{key}"] = value + + return types.create_image_block( + url=block["image_url"]["url"], + **all_extras, + ) + + # base64-style audio block + # audio is only represented via raw data, no url or ID option + if block["type"] == "input_audio": + known_keys = {"type", "input_audio"} + extras = _extract_extras(block, known_keys) + + # Also extract extras from nested audio dict + audio_known_keys = {"data", "format"} + audio_extras = _extract_extras(block["input_audio"], audio_known_keys) + + all_extras = {**extras} + for key, value in audio_extras.items(): + all_extras[f"audio_{key}"] = value + + return types.create_audio_block( + base64=block["input_audio"]["data"], + mime_type=f"audio/{block['input_audio']['format']}", + **all_extras, + ) + + # id-style file block + if block.get("type") == "file" and "file_id" in block.get("file", {}): + known_keys = {"type", "file"} + extras = _extract_extras(block, known_keys) + + file_known_keys = {"file_id"} + file_extras = _extract_extras(block["file"], file_known_keys) + + all_extras = {**extras} + for key, value in file_extras.items(): + all_extras[f"file_{key}"] = value + + return types.create_file_block( + file_id=block["file"]["file_id"], + **all_extras, + ) + + # base64-style file block + if (block["type"] == "file") and ( + parsed := _parse_data_uri(block["file"]["file_data"]) + ): + known_keys = {"type", "file"} + extras = _extract_extras(block, known_keys) + + file_known_keys = {"file_data", "filename"} + file_extras = _extract_extras(block["file"], file_known_keys) + + all_extras = {**extras} + for key, value in file_extras.items(): + all_extras[f"file_{key}"] = value + + mime_type = parsed["mime_type"] + filename = block["file"].get("filename") + return types.create_file_block( + base64=block["file"]["file_data"], + mime_type=mime_type, + filename=filename, + **all_extras, + ) + + # Escape hatch + return block diff --git a/libs/core/langchain_core/messages/block_translators/openai.py b/libs/core/langchain_core/messages/block_translators/openai.py index 16f81e6502595..029757563b44d 100644 --- a/libs/core/langchain_core/messages/block_translators/openai.py +++ b/libs/core/langchain_core/messages/block_translators/openai.py @@ -1,10 +1,20 @@ """Derivations of standard content blocks from OpenAI content.""" +from __future__ import annotations + from collections.abc import Iterable -from typing import Any, Optional, Union, cast +from typing import TYPE_CHECKING, Any, Optional, Union, cast -from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.language_models._utils import ( + _is_openai_data_block, +) from langchain_core.messages import content as types +from langchain_core.messages.block_translators.langchain import ( + _convert_openai_format_to_data_block, +) + +if TYPE_CHECKING: + from langchain_core.messages import AIMessage, AIMessageChunk # v1 / Chat Completions @@ -25,6 +35,55 @@ def _convert_to_v1_from_chat_completions( return content_blocks +def _convert_to_v1_from_chat_completions_input( + blocks: list[types.ContentBlock], +) -> list[types.ContentBlock]: + """Convert OpenAI Chat Completions format blocks to v1 format. + + Processes non_standard blocks that might be OpenAI format and converts them + to proper ContentBlocks. If conversion fails, leaves them as non_standard. + + Args: + blocks: List of content blocks to process. + + Returns: + Updated list with OpenAI blocks converted to v1 format. + """ + from langchain_core.messages import content as types + + converted_blocks = [] + for block in blocks: + if ( + isinstance(block, dict) + and block.get("type") == "non_standard" + and "value" in block + and isinstance(block["value"], dict) # type: ignore[typeddict-item] + ): + # We know this is a NonStandardContentBlock, so we can safely access value + value = cast("Any", block)["value"] + # Check if this looks like OpenAI format + if value.get("type") in { + "image_url", + "input_audio", + "file", + } and _is_openai_data_block(value): + converted_block = _convert_openai_format_to_data_block(value) + # If conversion succeeded, use it; otherwise keep as non_standard + if ( + isinstance(converted_block, dict) + and converted_block.get("type") in types.KNOWN_BLOCK_TYPES + ): + converted_blocks.append(cast("types.ContentBlock", converted_block)) + else: + converted_blocks.append(block) + else: + converted_blocks.append(block) + else: + converted_blocks.append(block) + + return converted_blocks + + def _convert_to_v1_from_chat_completions_chunk( chunk: AIMessageChunk, ) -> list[types.ContentBlock]: @@ -220,6 +279,9 @@ def _iter_blocks() -> Iterable[types.ContentBlock]: Union[types.ToolCall, types.InvalidToolCall, types.ToolCallChunk] ] = None call_id = block.get("call_id", "") + + from langchain_core.messages import AIMessageChunk + if ( isinstance(message, AIMessageChunk) and len(message.tool_call_chunks) == 1 diff --git a/libs/core/langchain_core/messages/content.py b/libs/core/langchain_core/messages/content.py index fd46859b9181b..83287fb06c8b8 100644 --- a/libs/core/langchain_core/messages/content.py +++ b/libs/core/langchain_core/messages/content.py @@ -894,8 +894,6 @@ class NonStandardContentBlock(TypedDict): ContentBlock = Union[ TextContentBlock, - ToolCall, - ToolCallChunk, InvalidToolCall, ReasoningContentBlock, NonStandardContentBlock, @@ -905,22 +903,27 @@ class NonStandardContentBlock(TypedDict): KNOWN_BLOCK_TYPES = { + # Text output "text", - "text-plain", + "reasoning", + # Tools "tool_call", "invalid_tool_call", "tool_call_chunk", - "reasoning", - "non_standard", + # Multimodal data "image", "audio", "file", + "text-plain", "video", + # Server-side tool calls "code_interpreter_call", "code_interpreter_output", "code_interpreter_result", "web_search_call", "web_search_result", + # Catch-all + "non_standard", } @@ -950,21 +953,25 @@ def is_data_content_block(block: dict) -> bool: True if the content block is a data content block, False otherwise. """ - return block.get("type") in _get_data_content_block_types() and any( - # Check if at least one non-type key is present to signify presence of data - key in block - for key in ( - "url", - "base64", - "file_id", - "text", - "source_type", # for backwards compatibility with v0 content blocks - # TODO: should we verify that if source_type is present, at least one of - # url, base64, or file_id is also present? Otherwise, source_type could be - # present without any actual data? Need to confirm whether this was ever - # possible in v0 content blocks in the first place. - ) - ) + if block.get("type") not in _get_data_content_block_types(): + return False + + if any(key in block for key in ("url", "base64", "file_id", "text")): + return True + + # Verify data presence based on source type + if "source_type" in block: + source_type = block["source_type"] + if (source_type == "url" and "url" in block) or ( + source_type == "base64" and "data" in block + ): + return True + if (source_type == "id" and "id" in block) or ( + source_type == "text" and "url" in block + ): + return True + + return False def is_tool_call_block(block: ContentBlock) -> TypeGuard[ToolCall]: @@ -1021,12 +1028,13 @@ def convert_to_openai_image_block(block: dict[str, Any]) -> dict: def convert_to_openai_data_block(block: dict) -> dict: """Format standard data content block to format expected by OpenAI.""" - # TODO: make sure this supports new v1 if block["type"] == "image": formatted_block = convert_to_openai_image_block(block) elif block["type"] == "file": if "base64" in block or block.get("source_type") == "base64": + # Handle v0 format: {"source_type": "base64", "data": "...", ...} + # Handle v1 format: {"base64": "...", ...} base64_data = block["data"] if "source_type" in block else block["base64"] file = {"file_data": f"data:{block['mime_type']};base64,{base64_data}"} if filename := block.get("filename"): @@ -1045,6 +1053,8 @@ def convert_to_openai_data_block(block: dict) -> dict: ) formatted_block = {"type": "file", "file": file} elif "file_id" in block or block.get("source_type") == "id": + # Handle v0 format: {"source_type": "id", "id": "...", ...} + # Handle v1 format: {"file_id": "...", ...} file_id = block["id"] if "source_type" in block else block["file_id"] formatted_block = {"type": "file", "file": {"file_id": file_id}} else: @@ -1053,6 +1063,8 @@ def convert_to_openai_data_block(block: dict) -> dict: elif block["type"] == "audio": if "base64" in block or block.get("source_type") == "base64": + # Handle v0 format: {"source_type": "base64", "data": "...", ...} + # Handle v1 format: {"base64": "...", ...} base64_data = block["data"] if "source_type" in block else block["base64"] audio_format = block["mime_type"].split("/")[-1] formatted_block = { diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index 4662c2fd58b7f..848fb75091a9f 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -44,6 +44,37 @@ from langchain_core.outputs.llm_result import LLMResult +def _content_blocks_equal_ignore_id( + actual: Union[str, list[Any]], expected: Union[str, list[Any]] +) -> bool: + """Compare content blocks, ignoring auto-generated `id` fields. + + Args: + actual: Actual content from response (string or list of content blocks). + expected: Expected content to compare against (string or list of blocks). + + Returns: + True if content matches (excluding `id` fields), False otherwise. + + """ + if isinstance(actual, str) or isinstance(expected, str): + return actual == expected + + if len(actual) != len(expected): + return False + for actual_block, expected_block in zip(actual, expected): + actual_without_id = ( + {k: v for k, v in actual_block.items() if k != "id"} + if isinstance(actual_block, dict) and "id" in actual_block + else actual_block + ) + + if actual_without_id != expected_block: + return False + + return True + + @pytest.fixture def messages() -> list: return [ @@ -145,7 +176,7 @@ def eval_response(callback: BaseFakeCallbackHandler, i: int) -> None: async def test_astream_fallback_to_ainvoke() -> None: - """Test astream uses appropriate implementation.""" + """Test `astream()` uses appropriate implementation.""" class ModelWithGenerate(BaseChatModel): @override @@ -431,11 +462,12 @@ def on_chat_model_start(self, *args: Any, **kwargs: Any) -> Run: def test_trace_images_in_openai_format() -> None: - """Test that images are traced in OpenAI format.""" + """Test that images are traced in OpenAI Chat Completions format.""" llm = ParrotFakeChatModel() messages = [ { "role": "user", + # v0 format "content": [ { "type": "image", @@ -446,7 +478,7 @@ def test_trace_images_in_openai_format() -> None: } ] tracer = FakeChatModelStartTracer() - response = llm.invoke(messages, config={"callbacks": [tracer]}) + llm.invoke(messages, config={"callbacks": [tracer]}) assert tracer.messages == [ [ [ @@ -461,19 +493,51 @@ def test_trace_images_in_openai_format() -> None: ] ] ] - # Test no mutation - assert response.content == [ - { + + +def test_content_block_transformation_v0_to_v1_image() -> None: + """Test that v0 format image content blocks are transformed to v1 format.""" + # Create a message with v0 format image content + image_message = AIMessage( + content=[ + { + "type": "image", + "source_type": "url", + "url": "https://example.com/image.png", + } + ] + ) + + llm = GenericFakeChatModel(messages=iter([image_message]), output_version="v1") + response = llm.invoke("test") + + # With v1 output_version, .content should be transformed + # Check structure, ignoring auto-generated IDs + assert len(response.content) == 1 + content_block = response.content[0] + if isinstance(content_block, dict) and "id" in content_block: + # Remove auto-generated id for comparison + content_without_id = {k: v for k, v in content_block.items() if k != "id"} + expected_content = { + "type": "image", + "url": "https://example.com/image.png", + } + assert content_without_id == expected_content + else: + assert content_block == { "type": "image", - "source_type": "url", "url": "https://example.com/image.png", } - ] -def test_trace_content_blocks_with_no_type_key() -> None: - """Test that we add a ``type`` key to certain content blocks that don't have one.""" - llm = ParrotFakeChatModel() +@pytest.mark.parametrize("output_version", ["v0", "v1"]) +def test_trace_content_blocks_with_no_type_key(output_version: str) -> None: + """Test behavior of content blocks that don't have a `type` key. + + Only for blocks with one key, in which case, the name of the key is used as `type`. + + """ + llm = ParrotFakeChatModel(output_version=output_version) messages = [ { "role": "user", @@ -508,156 +572,235 @@ def test_trace_content_blocks_with_no_type_key() -> None: ] ] ] - # Test no mutation - assert response.content == [ + + if output_version == "v0": + assert response.content == [ + { + "type": "text", + "text": "Hello", + }, + { + "cachePoint": {"type": "default"}, + }, + ] + else: + assert response.content == [ + { + "type": "text", + "text": "Hello", + }, + { + "type": "non_standard", + "value": { + "cachePoint": {"type": "default"}, + }, + }, + ] + + assert response.content_blocks == [ { "type": "text", "text": "Hello", }, { - "cachePoint": {"type": "default"}, + "type": "non_standard", + "value": { + "cachePoint": {"type": "default"}, + }, }, ] def test_extend_support_to_openai_multimodal_formats() -> None: - """Test that chat models normalize OpenAI file and audio inputs.""" - llm = ParrotFakeChatModel() - messages = [ - { - "role": "user", - "content": [ - {"type": "text", "text": "Hello"}, - { - "type": "image_url", - "image_url": {"url": "https://example.com/image.png"}, + """Test normalizing OpenAI audio, image, and file inputs to v1.""" + # Audio and file only (chat model default) + messages = HumanMessage( + content=[ + {"type": "text", "text": "Hello"}, + { # audio-base64 + "type": "input_audio", + "input_audio": { + "format": "wav", + "data": "data:audio/wav;base64,", }, - { - "type": "image_url", - "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg..."}, + }, + { # file-base64 + "type": "file", + "file": { + "filename": "draconomicon.pdf", + "file_data": "data:application/pdf;base64,", }, + }, + { # file-id + "type": "file", + "file": {"file_id": ""}, + }, + ] + ) + + expected_content_messages = HumanMessage( + content=[ + {"type": "text", "text": "Hello"}, # TextContentBlock + { # AudioContentBlock + "type": "audio", + "base64": "data:audio/wav;base64,", + "mime_type": "audio/wav", + }, + { # FileContentBlock + "type": "file", + "base64": "data:application/pdf;base64,", + "mime_type": "application/pdf", + "extras": {"filename": "draconomicon.pdf"}, + }, + { # ... + "type": "file", + "file_id": "", + }, + ] + ) + + normalized_content = _normalize_messages([messages]) + + # Check structure, ignoring auto-generated IDs + assert len(normalized_content) == 1 + normalized_message = normalized_content[0] + assert len(normalized_message.content) == len(expected_content_messages.content) + + assert _content_blocks_equal_ignore_id( + normalized_message.content, expected_content_messages.content + ) + + messages = HumanMessage( + content=[ + {"type": "text", "text": "Hello"}, + { # image-url + "type": "image_url", + "image_url": {"url": "https://example.com/image.png"}, + }, + { # image-base64 + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg..."}, + }, + { # audio-base64 + "type": "input_audio", + "input_audio": { + "format": "wav", + "data": "data:audio/wav;base64,", + }, + }, + { # file-base64 + "type": "file", + "file": { + "filename": "draconomicon.pdf", + "file_data": "data:application/pdf;base64,", + }, + }, + { # file-id + "type": "file", + "file": {"file_id": ""}, + }, + ] + ) + + expected_content_messages = HumanMessage( + content=[ + {"type": "text", "text": "Hello"}, # TextContentBlock + { # Chat Completions Image becomes ImageContentBlock after invoke + "type": "image", + "url": "https://example.com/image.png", + }, + { # ... + "type": "image", + "base64": "data:image/jpeg;base64,/9j/4AAQSkZJRg...", + "mime_type": "image/jpeg", + }, + { # AudioContentBlock + "type": "audio", + "base64": "data:audio/wav;base64,", + "mime_type": "audio/wav", + }, + { # FileContentBlock + "type": "file", + "base64": "data:application/pdf;base64,", + "mime_type": "application/pdf", + "extras": {"filename": "draconomicon.pdf"}, + }, + { # ... + "type": "file", + "file_id": "", + }, + ] + ) + + +def test_normalize_messages_edge_cases() -> None: + # Test behavior of malformed/unrecognized content blocks + + messages = [ + HumanMessage( + content=[ { - "type": "file", - "file": { - "filename": "draconomicon.pdf", - "file_data": "data:application/pdf;base64,", - }, + "type": "input_image", # Responses API type; not handled + "image_url": "uri", }, { - "type": "file", - "file": { - "file_data": "data:application/pdf;base64,", - }, + # Standard OpenAI Chat Completions type but malformed structure + "type": "input_audio", + "input_audio": "uri", # Should be nested in `audio` }, { "type": "file", - "file": {"file_id": ""}, + "file": "uri", # `file` should be a dict for Chat Completions }, { - "type": "input_audio", - "input_audio": {"data": "", "format": "wav"}, + "type": "input_file", # Responses API type; not handled + "file_data": "uri", + "filename": "file-name", }, - ], - }, - ] - expected_content = [ - {"type": "text", "text": "Hello"}, - { - "type": "image_url", - "image_url": {"url": "https://example.com/image.png"}, - }, - { - "type": "image_url", - "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg..."}, - }, - { - "type": "file", - "source_type": "base64", - "data": "", - "mime_type": "application/pdf", - "filename": "draconomicon.pdf", - }, - { - "type": "file", - "source_type": "base64", - "data": "", - "mime_type": "application/pdf", - }, - { - "type": "file", - "file": {"file_id": ""}, - }, - { - "type": "audio", - "source_type": "base64", - "data": "", - "mime_type": "audio/wav", - }, + ] + ) ] - response = llm.invoke(messages) - assert response.content == expected_content - # Test no mutation - assert messages[0]["content"] == [ - {"type": "text", "text": "Hello"}, - { - "type": "image_url", - "image_url": {"url": "https://example.com/image.png"}, - }, - { - "type": "image_url", - "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg..."}, - }, - { - "type": "file", - "file": { - "filename": "draconomicon.pdf", - "file_data": "data:application/pdf;base64,", - }, - }, - { - "type": "file", - "file": { - "file_data": "data:application/pdf;base64,", - }, - }, - { - "type": "file", - "file": {"file_id": ""}, - }, - { - "type": "input_audio", - "input_audio": {"data": "", "format": "wav"}, - }, - ] + assert messages == _normalize_messages(messages) -def test_normalize_messages_edge_cases() -> None: - # Test some blocks that should pass through - messages = [ +def test_normalize_messages_v1_content_blocks_unchanged() -> None: + """Test passing v1 content blocks to `_normalize_messages()` leaves unchanged.""" + input_messages = [ HumanMessage( content=[ { - "type": "file", - "file": "uri", + "type": "text", + "text": "Hello world", }, { - "type": "input_file", - "file_data": "uri", - "filename": "file-name", + "type": "image", + "url": "https://example.com/image.png", + "mime_type": "image/png", }, { - "type": "input_audio", - "input_audio": "uri", + "type": "audio", + "base64": "base64encodedaudiodata", + "mime_type": "audio/wav", }, { - "type": "input_image", - "image_url": "uri", + "type": "file", + "id": "file_123", + }, + { + "type": "reasoning", + "reasoning": "Let me think about this...", }, ] ) ] - assert messages == _normalize_messages(messages) + + result = _normalize_messages(input_messages) + + # Verify the result is identical to the input (message should not be copied) + assert len(result) == 1 + assert result[0] is input_messages[0] + assert result[0].content == input_messages[0].content def test_output_version_invoke(monkeypatch: Any) -> None: @@ -679,18 +822,24 @@ def test_output_version_invoke(monkeypatch: Any) -> None: assert response.response_metadata["output_version"] == "v1" +# -- v1 output version tests -- + + async def test_output_version_ainvoke(monkeypatch: Any) -> None: messages = [AIMessage("hello")] + # v0 + llm = GenericFakeChatModel(messages=iter(messages)) + response = await llm.ainvoke("hello") + assert response.content == "hello" + + # v1 llm = GenericFakeChatModel(messages=iter(messages), output_version="v1") response = await llm.ainvoke("hello") assert response.content == [{"type": "text", "text": "hello"}] assert response.response_metadata["output_version"] == "v1" - llm = GenericFakeChatModel(messages=iter(messages)) - response = await llm.ainvoke("hello") - assert response.content == "hello" - + # v1 from env var monkeypatch.setenv("LC_OUTPUT_VERSION", "v1") llm = GenericFakeChatModel(messages=iter(messages)) response = await llm.ainvoke("hello") @@ -701,20 +850,7 @@ async def test_output_version_ainvoke(monkeypatch: Any) -> None: def test_output_version_stream(monkeypatch: Any) -> None: messages = [AIMessage("foo bar")] - llm = GenericFakeChatModel(messages=iter(messages), output_version="v1") - full: Optional[BaseMessageChunk] = None - for chunk in llm.stream("hello"): - assert isinstance(chunk, AIMessageChunk) - assert isinstance(chunk.content, list) - assert len(chunk.content) == 1 - block = chunk.content[0] - assert isinstance(block, dict) - assert block["type"] == "text" - assert block["text"] - full = chunk if full is None else full + chunk - assert isinstance(full, AIMessageChunk) - assert full.response_metadata["output_version"] == "v1" - + # v0 llm = GenericFakeChatModel(messages=iter(messages)) full = None for chunk in llm.stream("hello"): @@ -725,9 +861,9 @@ def test_output_version_stream(monkeypatch: Any) -> None: assert isinstance(full, AIMessageChunk) assert full.content == "foo bar" - monkeypatch.setenv("LC_OUTPUT_VERSION", "v1") - llm = GenericFakeChatModel(messages=iter(messages)) - full = None + # v1 + llm = GenericFakeChatModel(messages=iter(messages), output_version="v1") + full_v1: Optional[BaseMessageChunk] = None for chunk in llm.stream("hello"): assert isinstance(chunk, AIMessageChunk) assert isinstance(chunk.content, list) @@ -736,17 +872,15 @@ def test_output_version_stream(monkeypatch: Any) -> None: assert isinstance(block, dict) assert block["type"] == "text" assert block["text"] - full = chunk if full is None else full + chunk - assert isinstance(full, AIMessageChunk) - assert full.response_metadata["output_version"] == "v1" - - -async def test_output_version_astream(monkeypatch: Any) -> None: - messages = [AIMessage("foo bar")] + full_v1 = chunk if full_v1 is None else full_v1 + chunk + assert isinstance(full_v1, AIMessageChunk) + assert full_v1.response_metadata["output_version"] == "v1" - llm = GenericFakeChatModel(messages=iter(messages), output_version="v1") - full: Optional[BaseMessageChunk] = None - async for chunk in llm.astream("hello"): + # v1 from env var + monkeypatch.setenv("LC_OUTPUT_VERSION", "v1") + llm = GenericFakeChatModel(messages=iter(messages)) + full_env = None + for chunk in llm.stream("hello"): assert isinstance(chunk, AIMessageChunk) assert isinstance(chunk.content, list) assert len(chunk.content) == 1 @@ -754,10 +888,15 @@ async def test_output_version_astream(monkeypatch: Any) -> None: assert isinstance(block, dict) assert block["type"] == "text" assert block["text"] - full = chunk if full is None else full + chunk - assert isinstance(full, AIMessageChunk) - assert full.response_metadata["output_version"] == "v1" + full_env = chunk if full_env is None else full_env + chunk + assert isinstance(full_env, AIMessageChunk) + assert full_env.response_metadata["output_version"] == "v1" + +async def test_output_version_astream(monkeypatch: Any) -> None: + messages = [AIMessage("foo bar")] + + # v0 llm = GenericFakeChatModel(messages=iter(messages)) full = None async for chunk in llm.astream("hello"): @@ -768,9 +907,25 @@ async def test_output_version_astream(monkeypatch: Any) -> None: assert isinstance(full, AIMessageChunk) assert full.content == "foo bar" + # v1 + llm = GenericFakeChatModel(messages=iter(messages), output_version="v1") + full_v1: Optional[BaseMessageChunk] = None + async for chunk in llm.astream("hello"): + assert isinstance(chunk, AIMessageChunk) + assert isinstance(chunk.content, list) + assert len(chunk.content) == 1 + block = chunk.content[0] + assert isinstance(block, dict) + assert block["type"] == "text" + assert block["text"] + full_v1 = chunk if full_v1 is None else full_v1 + chunk + assert isinstance(full_v1, AIMessageChunk) + assert full_v1.response_metadata["output_version"] == "v1" + + # v1 from env var monkeypatch.setenv("LC_OUTPUT_VERSION", "v1") llm = GenericFakeChatModel(messages=iter(messages)) - full = None + full_env = None async for chunk in llm.astream("hello"): assert isinstance(chunk, AIMessageChunk) assert isinstance(chunk.content, list) @@ -779,6 +934,6 @@ async def test_output_version_astream(monkeypatch: Any) -> None: assert isinstance(block, dict) assert block["type"] == "text" assert block["text"] - full = chunk if full is None else full + chunk - assert isinstance(full, AIMessageChunk) - assert full.response_metadata["output_version"] == "v1" + full_env = chunk if full_env is None else full_env + chunk + assert isinstance(full_env, AIMessageChunk) + assert full_env.response_metadata["output_version"] == "v1" diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_rate_limiting.py b/libs/core/tests/unit_tests/language_models/chat_models/test_rate_limiting.py index c4d6a50f6bedb..0411915c26925 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_rate_limiting.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_rate_limiting.py @@ -214,8 +214,8 @@ def test_rate_limit_skips_cache() -> None: assert list(cache._cache) == [ ( '[{"lc": 1, "type": "constructor", "id": ["langchain", "schema", ' - '"messages", ' - '"HumanMessage"], "kwargs": {"content": "foo", "type": "human"}}]', + '"messages", "HumanMessage"], "kwargs": {"content": "foo", ' + '"type": "human"}}]', "[('_type', 'generic-fake-chat-model'), ('stop', None)]", ) ] @@ -241,7 +241,8 @@ def test_serialization_with_rate_limiter() -> None: assert InMemoryRateLimiter.__name__ not in serialized_model -async def test_rate_limit_skips_cache_async() -> None: +@pytest.mark.parametrize("output_version", ["v0", "v1"]) +async def test_rate_limit_skips_cache_async(output_version: str) -> None: """Test that rate limiting does not rate limit cache look ups.""" cache = InMemoryCache() model = GenericFakeChatModel( @@ -250,6 +251,7 @@ async def test_rate_limit_skips_cache_async() -> None: requests_per_second=20, check_every_n_seconds=0.1, max_bucket_size=1 ), cache=cache, + output_version=output_version, ) tic = time.time() diff --git a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py index 27f126a314fdd..3602d9eb08d03 100644 --- a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py +++ b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py @@ -1,7 +1,39 @@ from typing import Optional +from langchain_core.language_models.fake_chat_models import ParrotFakeChatModel from langchain_core.messages import AIMessage, AIMessageChunk from langchain_core.messages import content as types +from tests.unit_tests.language_models.chat_models.test_base import ( + _content_blocks_equal_ignore_id, +) + + +def test_v0_to_v1_content_blocks() -> None: + llm = ParrotFakeChatModel() + messages = [ + { + "role": "user", + # v0 format + "content": [ + { + "type": "image", + "source_type": "url", + "url": "https://example.com/image.png", + } + ], + } + ] + response = llm.invoke(messages) + assert len(response.content_blocks) == 1 + expected_content_blocks = [ + { + "type": "image", + "url": "https://example.com/image.png", + } + ] + assert _content_blocks_equal_ignore_id( + response.content_blocks, expected_content_blocks + ) def test_convert_to_v1_from_responses() -> None: From 26833f2ebc66f1d4e3e5748762bdbed37fe40468 Mon Sep 17 00:00:00 2001 From: ccurme Date: Fri, 22 Aug 2025 17:06:53 -0300 Subject: [PATCH 44/73] feat(anthropic): v1 support (#32623) --- .../langchain_core/language_models/_utils.py | 2 +- .../language_models/chat_models.py | 2 +- libs/core/langchain_core/messages/ai.py | 10 +- libs/core/langchain_core/messages/base.py | 43 +- .../messages/block_translators/__init__.py | 76 +-- .../block_translators/amazon/__init__.py | 1 - .../messages/block_translators/anthropic.py | 414 ++++++++++++++++- .../block_translators/{amazon => }/bedrock.py | 26 +- .../{amazon => }/bedrock_converse.py | 28 +- .../messages/block_translators/chroma.py | 27 -- .../block_translators/google/__init__.py | 1 - .../{google/genai.py => google_genai.py} | 22 +- .../vertexai.py => google_vertexai.py} | 24 +- .../messages/block_translators/groq.py | 22 +- .../{langchain.py => langchain_v0.py} | 43 +- .../messages/block_translators/ollama.py | 22 +- .../messages/block_translators/openai.py | 51 +- libs/core/langchain_core/messages/content.py | 8 - libs/core/langchain_core/utils/_merge.py | 34 +- .../language_models/chat_models/test_base.py | 8 +- .../block_translators/test_anthropic.py | 439 ++++++++++++++++++ .../block_translators/test_langchain_v0.py | 79 ++++ .../messages/block_translators/test_openai.py | 92 ++-- .../block_translators/test_registration.py | 29 ++ .../core/tests/unit_tests/messages/test_ai.py | 93 ++++ .../anthropic/langchain_anthropic/_compat.py | 245 ++++++++++ .../langchain_anthropic/chat_models.py | 95 +++- .../tests/cassettes/test_agent_loop.yaml.gz | Bin 0 -> 2028 bytes .../test_agent_loop_streaming.yaml.gz | Bin 0 -> 3179 bytes .../tests/cassettes/test_citations.yaml.gz | Bin 0 -> 3388 bytes .../integration_tests/test_chat_models.py | 272 +++++++++-- .../__snapshots__/test_standard.ambr | 1 + .../tests/unit_tests/test_chat_models.py | 181 +++++++- 33 files changed, 2125 insertions(+), 265 deletions(-) delete mode 100644 libs/core/langchain_core/messages/block_translators/amazon/__init__.py rename libs/core/langchain_core/messages/block_translators/{amazon => }/bedrock.py (55%) rename libs/core/langchain_core/messages/block_translators/{amazon => }/bedrock_converse.py (54%) delete mode 100644 libs/core/langchain_core/messages/block_translators/chroma.py delete mode 100644 libs/core/langchain_core/messages/block_translators/google/__init__.py rename libs/core/langchain_core/messages/block_translators/{google/genai.py => google_genai.py} (60%) rename libs/core/langchain_core/messages/block_translators/{google/vertexai.py => google_vertexai.py} (59%) rename libs/core/langchain_core/messages/block_translators/{langchain.py => langchain_v0.py} (89%) create mode 100644 libs/core/tests/unit_tests/messages/block_translators/test_anthropic.py create mode 100644 libs/core/tests/unit_tests/messages/block_translators/test_langchain_v0.py create mode 100644 libs/core/tests/unit_tests/messages/block_translators/test_registration.py create mode 100644 libs/partners/anthropic/langchain_anthropic/_compat.py create mode 100644 libs/partners/anthropic/tests/cassettes/test_agent_loop.yaml.gz create mode 100644 libs/partners/anthropic/tests/cassettes/test_agent_loop_streaming.yaml.gz create mode 100644 libs/partners/anthropic/tests/cassettes/test_citations.yaml.gz diff --git a/libs/core/langchain_core/language_models/_utils.py b/libs/core/langchain_core/language_models/_utils.py index 94680674e3a64..cb80fedb3dd2b 100644 --- a/libs/core/langchain_core/language_models/_utils.py +++ b/libs/core/langchain_core/language_models/_utils.py @@ -212,7 +212,7 @@ def _normalize_messages( } """ - from langchain_core.messages.block_translators.langchain import ( + from langchain_core.messages.block_translators.langchain_v0 import ( _convert_legacy_v0_content_block_to_v1, _convert_openai_format_to_data_block, ) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index cfd648c2d0c3e..33331e512eb34 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -124,7 +124,7 @@ def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]: if ( block.get("type") == "image" and is_data_content_block(block) - and block.get("source_type") != "id" + and not ("file_id" in block or block.get("source_type") == "id") ): if message_to_trace is message: # Shallow copy diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 8fd48c5027eff..31be4dbca4e0a 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -231,7 +231,10 @@ def content_blocks(self) -> list[types.ContentBlock]: translator = get_translator(model_provider) if translator: - return translator["translate_content"](self) + try: + return translator["translate_content_chunk"](self) + except NotImplementedError: + pass # Otherwise, use best-effort parsing blocks = super().content_blocks @@ -380,7 +383,10 @@ def content_blocks(self) -> list[types.ContentBlock]: translator = get_translator(model_provider) if translator: - return translator["translate_content_chunk"](self) + try: + return translator["translate_content_chunk"](self) + except NotImplementedError: + pass # Otherwise, use best-effort parsing blocks = super().content_blocks diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 3452740b46ef2..89008c8c42998 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -7,13 +7,7 @@ from pydantic import ConfigDict, Field from langchain_core.load.serializable import Serializable -from langchain_core.messages.block_translators.langchain import ( - _convert_legacy_v0_content_block_to_v1, - _convert_v0_multimodal_input_to_v1, -) -from langchain_core.messages.block_translators.openai import ( - _convert_to_v1_from_chat_completions_input, -) +from langchain_core.messages import content as types from langchain_core.utils import get_bolded_text from langchain_core.utils._merge import merge_dicts, merge_lists from langchain_core.utils.interactive_env import is_interactive_env @@ -21,7 +15,6 @@ if TYPE_CHECKING: from collections.abc import Sequence - from langchain_core.messages import content as types from langchain_core.prompts.chat import ChatPromptTemplate @@ -129,6 +122,15 @@ def content_blocks(self) -> list[types.ContentBlock]: """ from langchain_core.messages import content as types + from langchain_core.messages.block_translators.anthropic import ( + _convert_to_v1_from_anthropic_input, + ) + from langchain_core.messages.block_translators.langchain_v0 import ( + _convert_v0_multimodal_input_to_v1, + ) + from langchain_core.messages.block_translators.openai import ( + _convert_to_v1_from_chat_completions_input, + ) blocks: list[types.ContentBlock] = [] @@ -143,26 +145,19 @@ def content_blocks(self) -> list[types.ContentBlock]: blocks.append({"type": "text", "text": item}) elif isinstance(item, dict): item_type = item.get("type") - # Try to convert potential v0 format first - converted_block = _convert_legacy_v0_content_block_to_v1(item) - if converted_block is not item: # Conversion happened - blocks.append(cast("types.ContentBlock", converted_block)) - elif item_type is None or item_type not in types.KNOWN_BLOCK_TYPES: - blocks.append( - cast( - "types.ContentBlock", - {"type": "non_standard", "value": item}, - ) - ) + if item_type not in types.KNOWN_BLOCK_TYPES: + blocks.append({"type": "non_standard", "value": item}) else: blocks.append(cast("types.ContentBlock", item)) # Subsequent passes: attempt to unpack non-standard blocks - blocks = _convert_v0_multimodal_input_to_v1(blocks) - # blocks = _convert_to_v1_from_anthropic_input(blocks) - # ... - - return _convert_to_v1_from_chat_completions_input(blocks) + for parsing_step in [ + _convert_v0_multimodal_input_to_v1, + _convert_to_v1_from_chat_completions_input, + _convert_to_v1_from_anthropic_input, + ]: + blocks = parsing_step(blocks) + return blocks def text(self) -> str: """Get the text content of the message. diff --git a/libs/core/langchain_core/messages/block_translators/__init__.py b/libs/core/langchain_core/messages/block_translators/__init__.py index ff58558713d13..bb9673a7c373b 100644 --- a/libs/core/langchain_core/messages/block_translators/__init__.py +++ b/libs/core/langchain_core/messages/block_translators/__init__.py @@ -45,37 +45,45 @@ def get_translator( return PROVIDER_TRANSLATORS.get(provider) -def _auto_register_translators() -> None: - """Automatically register all available block translators.""" - import contextlib - import importlib - import pkgutil - from pathlib import Path - - package_path = Path(__file__).parent - - # Discover all sub-modules - for module_info in pkgutil.iter_modules([str(package_path)]): - module_name = module_info.name - - # Skip the __init__ module and any private modules - if module_name.startswith("_"): - continue - - if module_info.ispkg: - # For subpackages, discover their submodules - subpackage_path = package_path / module_name - for submodule_info in pkgutil.iter_modules([str(subpackage_path)]): - submodule_name = submodule_info.name - if not submodule_name.startswith("_"): - with contextlib.suppress(ImportError, AttributeError): - importlib.import_module( - f".{module_name}.{submodule_name}", package=__name__ - ) - else: - # Import top-level translator modules - with contextlib.suppress(ImportError, AttributeError): - importlib.import_module(f".{module_name}", package=__name__) - - -_auto_register_translators() +def _register_translators() -> None: + """Register all translators in langchain-core. + + A unit test ensures all modules in ``block_translators`` are represented here. + + For translators implemented outside langchain-core, they can be registered by + calling ``register_translator`` from within the integration package. + """ + from langchain_core.messages.block_translators.anthropic import ( + _register_anthropic_translator, + ) + from langchain_core.messages.block_translators.bedrock import ( + _register_bedrock_translator, + ) + from langchain_core.messages.block_translators.bedrock_converse import ( + _register_bedrock_converse_translator, + ) + from langchain_core.messages.block_translators.google_genai import ( + _register_google_genai_translator, + ) + from langchain_core.messages.block_translators.google_vertexai import ( + _register_google_vertexai_translator, + ) + from langchain_core.messages.block_translators.groq import _register_groq_translator + from langchain_core.messages.block_translators.ollama import ( + _register_ollama_translator, + ) + from langchain_core.messages.block_translators.openai import ( + _register_openai_translator, + ) + + _register_bedrock_translator() + _register_bedrock_converse_translator() + _register_anthropic_translator() + _register_google_genai_translator() + _register_google_vertexai_translator() + _register_groq_translator() + _register_ollama_translator() + _register_openai_translator() + + +_register_translators() diff --git a/libs/core/langchain_core/messages/block_translators/amazon/__init__.py b/libs/core/langchain_core/messages/block_translators/amazon/__init__.py deleted file mode 100644 index 1fbfad4912db7..0000000000000 --- a/libs/core/langchain_core/messages/block_translators/amazon/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Derivations of standard content blocks from Amazon content.""" diff --git a/libs/core/langchain_core/messages/block_translators/anthropic.py b/libs/core/langchain_core/messages/block_translators/anthropic.py index 469b3812a570e..8f0b3919fa452 100644 --- a/libs/core/langchain_core/messages/block_translators/anthropic.py +++ b/libs/core/langchain_core/messages/block_translators/anthropic.py @@ -1,17 +1,423 @@ """Derivations of standard content blocks from Anthropic content.""" +import json +from collections.abc import Iterable +from typing import Any, cast + from langchain_core.messages import AIMessage, AIMessageChunk from langchain_core.messages import content as types +def _populate_extras( + standard_block: types.ContentBlock, block: dict[str, Any], known_fields: set[str] +) -> types.ContentBlock: + """Mutate a block, populating extras.""" + if standard_block.get("type") == "non_standard": + return standard_block + + for key, value in block.items(): + if key not in known_fields: + if "extras" not in block: + # Below type-ignores are because mypy thinks a non-standard block can + # get here, although we exclude them above. + standard_block["extras"] = {} # type: ignore[typeddict-unknown-key] + standard_block["extras"][key] = value # type: ignore[typeddict-item] + + return standard_block + + +def _convert_to_v1_from_anthropic_input( + content: list[types.ContentBlock], +) -> list[types.ContentBlock]: + """Attempt to unpack non-standard blocks.""" + + def _iter_blocks() -> Iterable[types.ContentBlock]: + blocks: list[dict[str, Any]] = [ + cast("dict[str, Any]", block) + if block.get("type") != "non_standard" + else block["value"] # type: ignore[typeddict-item] # this is only non-standard blocks + for block in content + ] + for block in blocks: + block_type = block.get("type") + + if ( + block_type == "document" + and "source" in block + and "type" in block["source"] + ): + if block["source"]["type"] == "base64": + file_block: types.FileContentBlock = { + "type": "file", + "base64": block["source"]["data"], + "mime_type": block["source"]["media_type"], + } + _populate_extras(file_block, block, {"type", "source"}) + yield file_block + + elif block["source"]["type"] == "url": + file_block = { + "type": "file", + "url": block["source"]["url"], + } + _populate_extras(file_block, block, {"type", "source"}) + yield file_block + + elif block["source"]["type"] == "file": + file_block = { + "type": "file", + "id": block["source"]["file_id"], + } + _populate_extras(file_block, block, {"type", "source"}) + yield file_block + + elif block["source"]["type"] == "text": + plain_text_block: types.PlainTextContentBlock = { + "type": "text-plain", + "text": block["source"]["data"], + "mime_type": block.get("media_type", "text/plain"), + } + _populate_extras(plain_text_block, block, {"type", "source"}) + yield plain_text_block + + else: + yield {"type": "non_standard", "value": block} + + elif ( + block_type == "image" + and "source" in block + and "type" in block["source"] + ): + if block["source"]["type"] == "base64": + image_block: types.ImageContentBlock = { + "type": "image", + "base64": block["source"]["data"], + "mime_type": block["source"]["media_type"], + } + _populate_extras(image_block, block, {"type", "source"}) + yield image_block + + elif block["source"]["type"] == "url": + image_block = { + "type": "image", + "url": block["source"]["url"], + } + _populate_extras(image_block, block, {"type", "source"}) + yield image_block + + elif block["source"]["type"] == "file": + image_block = { + "type": "image", + "id": block["source"]["file_id"], + } + _populate_extras(image_block, block, {"type", "source"}) + yield image_block + + else: + yield {"type": "non_standard", "value": block} + + elif block_type in types.KNOWN_BLOCK_TYPES: + yield cast("types.ContentBlock", block) + + else: + yield {"type": "non_standard", "value": block} + + return list(_iter_blocks()) + + +def _convert_citation_to_v1(citation: dict[str, Any]) -> types.Annotation: + citation_type = citation.get("type") + + if citation_type == "web_search_result_location": + url_citation: types.Citation = { + "type": "citation", + "cited_text": citation["cited_text"], + "url": citation["url"], + } + if title := citation.get("title"): + url_citation["title"] = title + known_fields = {"type", "cited_text", "url", "title", "index", "extras"} + for key, value in citation.items(): + if key not in known_fields: + if "extras" not in url_citation: + url_citation["extras"] = {} + url_citation["extras"][key] = value + + return url_citation + + if citation_type in ( + "char_location", + "content_block_location", + "page_location", + "search_result_location", + ): + document_citation: types.Citation = { + "type": "citation", + "cited_text": citation["cited_text"], + } + if "document_title" in citation: + document_citation["title"] = citation["document_title"] + elif title := citation.get("title"): + document_citation["title"] = title + else: + pass + known_fields = { + "type", + "cited_text", + "document_title", + "title", + "index", + "extras", + } + for key, value in citation.items(): + if key not in known_fields: + if "extras" not in document_citation: + document_citation["extras"] = {} + document_citation["extras"][key] = value + + return document_citation + + return { + "type": "non_standard_annotation", + "value": citation, + } + + +def _convert_to_v1_from_anthropic(message: AIMessage) -> list[types.ContentBlock]: + """Convert Anthropic message content to v1 format.""" + if isinstance(message.content, str): + message.content = [{"type": "text", "text": message.content}] + + def _iter_blocks() -> Iterable[types.ContentBlock]: + for block in message.content: + if not isinstance(block, dict): + continue + block_type = block.get("type") + + if block_type == "text": + if citations := block.get("citations"): + text_block: types.TextContentBlock = { + "type": "text", + "text": block.get("text", ""), + "annotations": [_convert_citation_to_v1(a) for a in citations], + } + else: + text_block = {"type": "text", "text": block["text"]} + if "index" in block: + text_block["index"] = block["index"] + yield text_block + + elif block_type == "thinking": + reasoning_block: types.ReasoningContentBlock = { + "type": "reasoning", + "reasoning": block.get("thinking", ""), + } + if "index" in block: + reasoning_block["index"] = block["index"] + known_fields = {"type", "thinking", "index", "extras"} + for key in block: + if key not in known_fields: + if "extras" not in reasoning_block: + reasoning_block["extras"] = {} + reasoning_block["extras"][key] = block[key] + yield reasoning_block + + elif block_type == "tool_use": + if ( + isinstance(message, AIMessageChunk) + and len(message.tool_call_chunks) == 1 + ): + tool_call_chunk: types.ToolCallChunk = ( + message.tool_call_chunks[0].copy() # type: ignore[assignment] + ) + if "type" not in tool_call_chunk: + tool_call_chunk["type"] = "tool_call_chunk" + yield tool_call_chunk + elif ( + not isinstance(message, AIMessageChunk) + and len(message.tool_calls) == 1 + ): + tool_call_block = message.tool_calls[0] + if "index" in block: + tool_call_block["index"] = block["index"] + yield tool_call_block + else: + tool_call_block = { + "type": "tool_call", + "name": block.get("name", ""), + "args": block.get("input", {}), + "id": block.get("id", ""), + } + yield tool_call_block + + elif ( + block_type == "input_json_delta" + and isinstance(message, AIMessageChunk) + and len(message.tool_call_chunks) == 1 + ): + tool_call_chunk = ( + message.tool_call_chunks[0].copy() # type: ignore[assignment] + ) + if "type" not in tool_call_chunk: + tool_call_chunk["type"] = "tool_call_chunk" + yield tool_call_chunk + + elif block_type == "server_tool_use": + if block.get("name") == "web_search": + web_search_call: types.WebSearchCall = {"type": "web_search_call"} + + if query := block.get("input", {}).get("query"): + web_search_call["query"] = query + + elif block.get("input") == {} and "partial_json" in block: + try: + input_ = json.loads(block["partial_json"]) + if isinstance(input_, dict) and "query" in input_: + web_search_call["query"] = input_["query"] + except json.JSONDecodeError: + pass + + if "id" in block: + web_search_call["id"] = block["id"] + if "index" in block: + web_search_call["index"] = block["index"] + known_fields = {"type", "name", "input", "id", "index"} + for key, value in block.items(): + if key not in known_fields: + if "extras" not in web_search_call: + web_search_call["extras"] = {} + web_search_call["extras"][key] = value + yield web_search_call + + elif block.get("name") == "code_execution": + code_interpreter_call: types.CodeInterpreterCall = { + "type": "code_interpreter_call" + } + + if code := block.get("input", {}).get("code"): + code_interpreter_call["code"] = code + + elif block.get("input") == {} and "partial_json" in block: + try: + input_ = json.loads(block["partial_json"]) + if isinstance(input_, dict) and "code" in input_: + code_interpreter_call["code"] = input_["code"] + except json.JSONDecodeError: + pass + + if "id" in block: + code_interpreter_call["id"] = block["id"] + if "index" in block: + code_interpreter_call["index"] = block["index"] + known_fields = {"type", "name", "input", "id", "index"} + for key, value in block.items(): + if key not in known_fields: + if "extras" not in code_interpreter_call: + code_interpreter_call["extras"] = {} + code_interpreter_call["extras"][key] = value + yield code_interpreter_call + + else: + new_block: types.NonStandardContentBlock = { + "type": "non_standard", + "value": block, + } + if "index" in new_block["value"]: + new_block["index"] = new_block["value"].pop("index") + yield new_block + + elif block_type == "web_search_tool_result": + web_search_result: types.WebSearchResult = {"type": "web_search_result"} + if "tool_use_id" in block: + web_search_result["id"] = block["tool_use_id"] + if "index" in block: + web_search_result["index"] = block["index"] + + if web_search_result_content := block.get("content", []): + if "extras" not in web_search_result: + web_search_result["extras"] = {} + urls = [] + extra_content = [] + for result_content in web_search_result_content: + if isinstance(result_content, dict): + if "url" in result_content: + urls.append(result_content["url"]) + extra_content.append(result_content) + web_search_result["extras"]["content"] = extra_content + if urls: + web_search_result["urls"] = urls + yield web_search_result + + elif block_type == "code_execution_tool_result": + code_interpreter_result: types.CodeInterpreterResult = { + "type": "code_interpreter_result", + "output": [], + } + if "tool_use_id" in block: + code_interpreter_result["id"] = block["tool_use_id"] + if "index" in block: + code_interpreter_result["index"] = block["index"] + + code_interpreter_output: types.CodeInterpreterOutput = { + "type": "code_interpreter_output" + } + + code_execution_content = block.get("content", {}) + if code_execution_content.get("type") == "code_execution_result": + if "return_code" in code_execution_content: + code_interpreter_output["return_code"] = code_execution_content[ + "return_code" + ] + if "stdout" in code_execution_content: + code_interpreter_output["stdout"] = code_execution_content[ + "stdout" + ] + if stderr := code_execution_content.get("stderr"): + code_interpreter_output["stderr"] = stderr + if ( + output := code_interpreter_output.get("content") + ) and isinstance(output, list): + if "extras" not in code_interpreter_result: + code_interpreter_result["extras"] = {} + code_interpreter_result["extras"]["content"] = output + for output_block in output: + if "file_id" in output_block: + if "file_ids" not in code_interpreter_output: + code_interpreter_output["file_ids"] = [] + code_interpreter_output["file_ids"].append( + output_block["file_id"] + ) + code_interpreter_result["output"].append(code_interpreter_output) + + elif ( + code_execution_content.get("type") + == "code_execution_tool_result_error" + ): + if "extras" not in code_interpreter_result: + code_interpreter_result["extras"] = {} + code_interpreter_result["extras"]["error_code"] = ( + code_execution_content.get("error_code") + ) + + yield code_interpreter_result + + else: + new_block = {"type": "non_standard", "value": block} + if "index" in new_block["value"]: + new_block["index"] = new_block["value"].pop("index") + yield new_block + + return list(_iter_blocks()) + + def translate_content(message: AIMessage) -> list[types.ContentBlock]: - """Derive standard content blocks from a message with Anthropic content.""" - raise NotImplementedError + """Derive standard content blocks from a message with OpenAI content.""" + return _convert_to_v1_from_anthropic(message) def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: - """Derive standard content blocks from a message chunk with Anthropic content.""" - raise NotImplementedError + """Derive standard content blocks from a message chunk with OpenAI content.""" + return _convert_to_v1_from_anthropic(message) def _register_anthropic_translator() -> None: diff --git a/libs/core/langchain_core/messages/block_translators/amazon/bedrock.py b/libs/core/langchain_core/messages/block_translators/bedrock.py similarity index 55% rename from libs/core/langchain_core/messages/block_translators/amazon/bedrock.py rename to libs/core/langchain_core/messages/block_translators/bedrock.py index 76467152b1028..796d45336b17f 100644 --- a/libs/core/langchain_core/messages/block_translators/amazon/bedrock.py +++ b/libs/core/langchain_core/messages/block_translators/bedrock.py @@ -1,16 +1,34 @@ """Derivations of standard content blocks from Amazon (Bedrock) content.""" +import warnings + from langchain_core.messages import AIMessage, AIMessageChunk from langchain_core.messages import content as types +WARNED = False + -def translate_content(message: AIMessage) -> list[types.ContentBlock]: +def translate_content(message: AIMessage) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a message with Bedrock content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Bedrock." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError -def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a chunk with Bedrock content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Bedrock." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError @@ -21,9 +39,7 @@ def _register_bedrock_translator() -> None: """ from langchain_core.messages.block_translators import register_translator - register_translator( - "amazon_bedrock_chat", translate_content, translate_content_chunk - ) + register_translator("bedrock", translate_content, translate_content_chunk) _register_bedrock_translator() diff --git a/libs/core/langchain_core/messages/block_translators/amazon/bedrock_converse.py b/libs/core/langchain_core/messages/block_translators/bedrock_converse.py similarity index 54% rename from libs/core/langchain_core/messages/block_translators/amazon/bedrock_converse.py rename to libs/core/langchain_core/messages/block_translators/bedrock_converse.py index 5882ef2583bc8..6249c9107a93c 100644 --- a/libs/core/langchain_core/messages/block_translators/amazon/bedrock_converse.py +++ b/libs/core/langchain_core/messages/block_translators/bedrock_converse.py @@ -1,16 +1,36 @@ """Derivations of standard content blocks from Amazon (Bedrock Converse) content.""" +import warnings + from langchain_core.messages import AIMessage, AIMessageChunk from langchain_core.messages import content as types +WARNED = False + -def translate_content(message: AIMessage) -> list[types.ContentBlock]: +def translate_content(message: AIMessage) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a message with Bedrock Converse content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Bedrock " + "Converse." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError -def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a chunk with Bedrock Converse content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Bedrock " + "Converse." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError @@ -21,9 +41,7 @@ def _register_bedrock_converse_translator() -> None: """ from langchain_core.messages.block_translators import register_translator - register_translator( - "amazon_bedrock_converse_chat", translate_content, translate_content_chunk - ) + register_translator("bedrock_converse", translate_content, translate_content_chunk) _register_bedrock_converse_translator() diff --git a/libs/core/langchain_core/messages/block_translators/chroma.py b/libs/core/langchain_core/messages/block_translators/chroma.py deleted file mode 100644 index 652aa8d0e1b0c..0000000000000 --- a/libs/core/langchain_core/messages/block_translators/chroma.py +++ /dev/null @@ -1,27 +0,0 @@ -"""Derivations of standard content blocks from Chroma content.""" - -from langchain_core.messages import AIMessage, AIMessageChunk -from langchain_core.messages import content as types - - -def translate_content(message: AIMessage) -> list[types.ContentBlock]: - """Derive standard content blocks from a message with Chroma content.""" - raise NotImplementedError - - -def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: - """Derive standard content blocks from a message chunk with Chroma content.""" - raise NotImplementedError - - -def _register_chroma_translator() -> None: - """Register the Chroma translator with the central registry. - - Run automatically when the module is imported. - """ - from langchain_core.messages.block_translators import register_translator - - register_translator("chroma", translate_content, translate_content_chunk) - - -_register_chroma_translator() diff --git a/libs/core/langchain_core/messages/block_translators/google/__init__.py b/libs/core/langchain_core/messages/block_translators/google/__init__.py deleted file mode 100644 index 0c3f0698aa2a5..0000000000000 --- a/libs/core/langchain_core/messages/block_translators/google/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Derivations of standard content blocks from Google content.""" diff --git a/libs/core/langchain_core/messages/block_translators/google/genai.py b/libs/core/langchain_core/messages/block_translators/google_genai.py similarity index 60% rename from libs/core/langchain_core/messages/block_translators/google/genai.py rename to libs/core/langchain_core/messages/block_translators/google_genai.py index b9761f94bc44a..bd4de65c3b0ce 100644 --- a/libs/core/langchain_core/messages/block_translators/google/genai.py +++ b/libs/core/langchain_core/messages/block_translators/google_genai.py @@ -1,16 +1,34 @@ """Derivations of standard content blocks from Google (GenAI) content.""" +import warnings + from langchain_core.messages import AIMessage, AIMessageChunk from langchain_core.messages import content as types +WARNED = False + -def translate_content(message: AIMessage) -> list[types.ContentBlock]: +def translate_content(message: AIMessage) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a message with Google (GenAI) content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Google GenAI." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError -def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a chunk with Google (GenAI) content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Google GenAI." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError diff --git a/libs/core/langchain_core/messages/block_translators/google/vertexai.py b/libs/core/langchain_core/messages/block_translators/google_vertexai.py similarity index 59% rename from libs/core/langchain_core/messages/block_translators/google/vertexai.py rename to libs/core/langchain_core/messages/block_translators/google_vertexai.py index ae51fd4065d89..e49ee384058ee 100644 --- a/libs/core/langchain_core/messages/block_translators/google/vertexai.py +++ b/libs/core/langchain_core/messages/block_translators/google_vertexai.py @@ -1,16 +1,36 @@ """Derivations of standard content blocks from Google (VertexAI) content.""" +import warnings + from langchain_core.messages import AIMessage, AIMessageChunk from langchain_core.messages import content as types +WARNED = False + -def translate_content(message: AIMessage) -> list[types.ContentBlock]: +def translate_content(message: AIMessage) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a message with Google (VertexAI) content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Google " + "VertexAI." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError -def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a chunk with Google (VertexAI) content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Google " + "VertexAI." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError diff --git a/libs/core/langchain_core/messages/block_translators/groq.py b/libs/core/langchain_core/messages/block_translators/groq.py index 4b01dfb017f2f..6a96b1775f429 100644 --- a/libs/core/langchain_core/messages/block_translators/groq.py +++ b/libs/core/langchain_core/messages/block_translators/groq.py @@ -1,16 +1,34 @@ """Derivations of standard content blocks from Groq content.""" +import warnings + from langchain_core.messages import AIMessage, AIMessageChunk from langchain_core.messages import content as types +WARNED = False + -def translate_content(message: AIMessage) -> list[types.ContentBlock]: +def translate_content(message: AIMessage) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a message with Groq content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Groq." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError -def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a message chunk with Groq content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Groq." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError diff --git a/libs/core/langchain_core/messages/block_translators/langchain.py b/libs/core/langchain_core/messages/block_translators/langchain_v0.py similarity index 89% rename from libs/core/langchain_core/messages/block_translators/langchain.py rename to libs/core/langchain_core/messages/block_translators/langchain_v0.py index 4b5e4479835a3..5fde4c0fcb0d4 100644 --- a/libs/core/langchain_core/messages/block_translators/langchain.py +++ b/libs/core/langchain_core/messages/block_translators/langchain_v0.py @@ -1,4 +1,4 @@ -"""Derivations of standard content blocks from LangChain content.""" +"""Derivations of standard content blocks from LangChain v0 multimodal content.""" from typing import Any, Union, cast @@ -21,26 +21,20 @@ def _convert_v0_multimodal_input_to_v1( Updated list with v0 blocks converted to v1 format. """ converted_blocks = [] - for block in blocks: - if ( - isinstance(block, dict) - and block.get("type") == "non_standard" - and "value" in block - and isinstance(block["value"], dict) # type: ignore[typeddict-item] - ): - # We know this is a NonStandardContentBlock, so we can safely access value - value = cast("Any", block)["value"] - # Check if this looks like v0 format - if ( - value.get("type") in {"image", "audio", "file"} - and "source_type" in value - ): - converted_block = _convert_legacy_v0_content_block_to_v1(value) - converted_blocks.append(cast("types.ContentBlock", converted_block)) - else: - converted_blocks.append(block) + unpacked_blocks: list[dict[str, Any]] = [ + cast("dict[str, Any]", block) + if block.get("type") != "non_standard" + else block["value"] # type: ignore[typeddict-item] # this is only non-standard blocks + for block in blocks + ] + for block in unpacked_blocks: + if block.get("type") in {"image", "audio", "file"} and "source_type" in block: + converted_block = _convert_legacy_v0_content_block_to_v1(block) + converted_blocks.append(cast("types.ContentBlock", converted_block)) + elif block.get("type") in types.KNOWN_BLOCK_TYPES: + converted_blocks.append(cast("types.ContentBlock", block)) else: - converted_blocks.append(block) + converted_blocks.append({"type": "non_standard", "value": block}) return converted_blocks @@ -213,7 +207,7 @@ def _extract_extras(block_dict: dict, known_keys: set[str]) -> dict[str, Any]: return types.create_image_block( # Even though this is labeled as `url`, it can be base64-encoded - base64=block["image_url"]["url"], + base64=parsed["data"], mime_type=parsed["mime_type"], **all_extras, ) @@ -278,9 +272,7 @@ def _extract_extras(block_dict: dict, known_keys: set[str]) -> dict[str, Any]: ) # base64-style file block - if (block["type"] == "file") and ( - parsed := _parse_data_uri(block["file"]["file_data"]) - ): + if block["type"] == "file": known_keys = {"type", "file"} extras = _extract_extras(block, known_keys) @@ -291,11 +283,10 @@ def _extract_extras(block_dict: dict, known_keys: set[str]) -> dict[str, Any]: for key, value in file_extras.items(): all_extras[f"file_{key}"] = value - mime_type = parsed["mime_type"] filename = block["file"].get("filename") return types.create_file_block( base64=block["file"]["file_data"], - mime_type=mime_type, + mime_type="application/pdf", filename=filename, **all_extras, ) diff --git a/libs/core/langchain_core/messages/block_translators/ollama.py b/libs/core/langchain_core/messages/block_translators/ollama.py index a0f41ab76342d..736ecfe06513e 100644 --- a/libs/core/langchain_core/messages/block_translators/ollama.py +++ b/libs/core/langchain_core/messages/block_translators/ollama.py @@ -1,16 +1,34 @@ """Derivations of standard content blocks from Ollama content.""" +import warnings + from langchain_core.messages import AIMessage, AIMessageChunk from langchain_core.messages import content as types +WARNED = False + -def translate_content(message: AIMessage) -> list[types.ContentBlock]: +def translate_content(message: AIMessage) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a message with Ollama content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Ollama." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError -def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: +def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]: # noqa: ARG001 """Derive standard content blocks from a message chunk with Ollama content.""" + global WARNED # noqa: PLW0603 + if not WARNED: + warning_message = ( + "Content block standardization is not yet fully supported for Ollama." + ) + warnings.warn(warning_message, stacklevel=2) + WARNED = True raise NotImplementedError diff --git a/libs/core/langchain_core/messages/block_translators/openai.py b/libs/core/langchain_core/messages/block_translators/openai.py index 029757563b44d..b11e64558aad6 100644 --- a/libs/core/langchain_core/messages/block_translators/openai.py +++ b/libs/core/langchain_core/messages/block_translators/openai.py @@ -9,7 +9,7 @@ _is_openai_data_block, ) from langchain_core.messages import content as types -from langchain_core.messages.block_translators.langchain import ( +from langchain_core.messages.block_translators.langchain_v0 import ( _convert_openai_format_to_data_block, ) @@ -52,34 +52,31 @@ def _convert_to_v1_from_chat_completions_input( from langchain_core.messages import content as types converted_blocks = [] - for block in blocks: - if ( - isinstance(block, dict) - and block.get("type") == "non_standard" - and "value" in block - and isinstance(block["value"], dict) # type: ignore[typeddict-item] - ): - # We know this is a NonStandardContentBlock, so we can safely access value - value = cast("Any", block)["value"] - # Check if this looks like OpenAI format - if value.get("type") in { - "image_url", - "input_audio", - "file", - } and _is_openai_data_block(value): - converted_block = _convert_openai_format_to_data_block(value) - # If conversion succeeded, use it; otherwise keep as non_standard - if ( - isinstance(converted_block, dict) - and converted_block.get("type") in types.KNOWN_BLOCK_TYPES - ): - converted_blocks.append(cast("types.ContentBlock", converted_block)) - else: - converted_blocks.append(block) + unpacked_blocks: list[dict[str, Any]] = [ + cast("dict[str, Any]", block) + if block.get("type") != "non_standard" + else block["value"] # type: ignore[typeddict-item] # this is only non-standard blocks + for block in blocks + ] + for block in unpacked_blocks: + if block.get("type") in { + "image_url", + "input_audio", + "file", + } and _is_openai_data_block(block): + converted_block = _convert_openai_format_to_data_block(block) + # If conversion succeeded, use it; otherwise keep as non_standard + if ( + isinstance(converted_block, dict) + and converted_block.get("type") in types.KNOWN_BLOCK_TYPES + ): + converted_blocks.append(cast("types.ContentBlock", converted_block)) else: - converted_blocks.append(block) + converted_blocks.append({"type": "non_standard", "value": block}) + elif block.get("type") in types.KNOWN_BLOCK_TYPES: + converted_blocks.append(cast("types.ContentBlock", block)) else: - converted_blocks.append(block) + converted_blocks.append({"type": "non_standard", "value": block}) return converted_blocks diff --git a/libs/core/langchain_core/messages/content.py b/libs/core/langchain_core/messages/content.py index 83287fb06c8b8..845c3b481cecc 100644 --- a/libs/core/langchain_core/messages/content.py +++ b/libs/core/langchain_core/messages/content.py @@ -503,12 +503,6 @@ class CodeInterpreterOutput(TypedDict): file_ids: NotRequired[list[str]] """List of file IDs generated by the code interpreter.""" - index: NotRequired[Union[int, str]] - """Index of block in aggregate response. Used during streaming.""" - - extras: NotRequired[dict[str, Any]] - """Provider-specific metadata.""" - class CodeInterpreterResult(TypedDict): """Result of a code interpreter tool call.""" @@ -886,7 +880,6 @@ class NonStandardContentBlock(TypedDict): ToolCall, ToolCallChunk, CodeInterpreterCall, - CodeInterpreterOutput, CodeInterpreterResult, WebSearchCall, WebSearchResult, @@ -918,7 +911,6 @@ class NonStandardContentBlock(TypedDict): "video", # Server-side tool calls "code_interpreter_call", - "code_interpreter_output", "code_interpreter_result", "web_search_call", "web_search_result", diff --git a/libs/core/langchain_core/utils/_merge.py b/libs/core/langchain_core/utils/_merge.py index c32b09e2e669c..7b8465e8d0256 100644 --- a/libs/core/langchain_core/utils/_merge.py +++ b/libs/core/langchain_core/utils/_merge.py @@ -116,11 +116,35 @@ def merge_lists(left: Optional[list], *others: Optional[list]) -> Optional[list] if to_merge: # TODO: Remove this once merge_dict is updated with special # handling for 'type'. - new_e = ( - {k: v for k, v in e.items() if k != "type"} - if "type" in e - else e - ) + if (left_type := merged[to_merge[0]].get("type")) and ( + e.get("type") == "non_standard" and "value" in e + ): + if left_type != "non_standard": + # standard + non_standard + new_e: dict[str, Any] = { + "extras": { + k: v + for k, v in e["value"].items() + if k != "type" + } + } + else: + # non_standard + non_standard + new_e = { + "value": { + k: v + for k, v in e["value"].items() + if k != "type" + } + } + if "index" in e: + new_e["index"] = e["index"] + else: + new_e = ( + {k: v for k, v in e.items() if k != "type"} + if "type" in e + else e + ) merged[to_merge[0]] = merge_dicts(merged[to_merge[0]], new_e) else: merged.append(e) diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index 848fb75091a9f..22d8bc7907f5e 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -621,14 +621,14 @@ def test_extend_support_to_openai_multimodal_formats() -> None: "type": "input_audio", "input_audio": { "format": "wav", - "data": "data:audio/wav;base64,", + "data": "", }, }, { # file-base64 "type": "file", "file": { "filename": "draconomicon.pdf", - "file_data": "data:application/pdf;base64,", + "file_data": "", }, }, { # file-id @@ -643,12 +643,12 @@ def test_extend_support_to_openai_multimodal_formats() -> None: {"type": "text", "text": "Hello"}, # TextContentBlock { # AudioContentBlock "type": "audio", - "base64": "data:audio/wav;base64,", + "base64": "", "mime_type": "audio/wav", }, { # FileContentBlock "type": "file", - "base64": "data:application/pdf;base64,", + "base64": "", "mime_type": "application/pdf", "extras": {"filename": "draconomicon.pdf"}, }, diff --git a/libs/core/tests/unit_tests/messages/block_translators/test_anthropic.py b/libs/core/tests/unit_tests/messages/block_translators/test_anthropic.py new file mode 100644 index 0000000000000..e0f65657b99ca --- /dev/null +++ b/libs/core/tests/unit_tests/messages/block_translators/test_anthropic.py @@ -0,0 +1,439 @@ +from typing import Optional + +from langchain_core.messages import AIMessage, AIMessageChunk, HumanMessage +from langchain_core.messages import content as types + + +def test_convert_to_v1_from_anthropic() -> None: + message = AIMessage( + [ + {"type": "thinking", "thinking": "foo", "signature": "foo_signature"}, + {"type": "text", "text": "Let's call a tool."}, + { + "type": "tool_use", + "id": "abc_123", + "name": "get_weather", + "input": {"location": "San Francisco"}, + }, + { + "type": "text", + "text": "It's sunny.", + "citations": [ + { + "type": "search_result_location", + "cited_text": "The weather is sunny.", + "source": "source_123", + "title": "Document Title", + "search_result_index": 1, + "start_block_index": 0, + "end_block_index": 2, + }, + {"bar": "baz"}, + ], + }, + { + "type": "server_tool_use", + "name": "web_search", + "input": {"query": "web search query"}, + "id": "srvtoolu_abc123", + }, + { + "type": "web_search_tool_result", + "tool_use_id": "srvtoolu_abc123", + "content": [ + { + "type": "web_search_result", + "title": "Page Title 1", + "url": "", + "page_age": "January 1, 2025", + "encrypted_content": "", + }, + { + "type": "web_search_result", + "title": "Page Title 2", + "url": "", + "page_age": "January 2, 2025", + "encrypted_content": "", + }, + ], + }, + { + "type": "server_tool_use", + "id": "srvtoolu_def456", + "name": "code_execution", + "input": {"code": "import numpy as np..."}, + }, + { + "type": "code_execution_tool_result", + "tool_use_id": "srvtoolu_def456", + "content": { + "type": "code_execution_result", + "stdout": "Mean: 5.5\nStandard deviation...", + "stderr": "", + "return_code": 0, + }, + }, + {"type": "something_else", "foo": "bar"}, + ], + response_metadata={"model_provider": "anthropic"}, + ) + expected_content: list[types.ContentBlock] = [ + { + "type": "reasoning", + "reasoning": "foo", + "extras": {"signature": "foo_signature"}, + }, + {"type": "text", "text": "Let's call a tool."}, + { + "type": "tool_call", + "id": "abc_123", + "name": "get_weather", + "args": {"location": "San Francisco"}, + }, + { + "type": "text", + "text": "It's sunny.", + "annotations": [ + { + "type": "citation", + "title": "Document Title", + "cited_text": "The weather is sunny.", + "extras": { + "source": "source_123", + "search_result_index": 1, + "start_block_index": 0, + "end_block_index": 2, + }, + }, + {"type": "non_standard_annotation", "value": {"bar": "baz"}}, + ], + }, + { + "type": "web_search_call", + "id": "srvtoolu_abc123", + "query": "web search query", + }, + { + "type": "web_search_result", + "id": "srvtoolu_abc123", + "urls": ["", ""], + "extras": { + "content": [ + { + "type": "web_search_result", + "title": "Page Title 1", + "url": "", + "page_age": "January 1, 2025", + "encrypted_content": "", + }, + { + "type": "web_search_result", + "title": "Page Title 2", + "url": "", + "page_age": "January 2, 2025", + "encrypted_content": "", + }, + ] + }, + }, + { + "type": "code_interpreter_call", + "id": "srvtoolu_def456", + "code": "import numpy as np...", + }, + { + "type": "code_interpreter_result", + "id": "srvtoolu_def456", + "output": [ + { + "type": "code_interpreter_output", + "return_code": 0, + "stdout": "Mean: 5.5\nStandard deviation...", + } + ], + }, + { + "type": "non_standard", + "value": {"type": "something_else", "foo": "bar"}, + }, + ] + assert message.content_blocks == expected_content + + # Check no mutation + assert message.content != expected_content + + +def test_convert_to_v1_from_anthropic_chunk() -> None: + chunks = [ + AIMessageChunk( + content=[{"text": "Looking ", "type": "text", "index": 0}], + response_metadata={"model_provider": "anthropic"}, + ), + AIMessageChunk( + content=[{"text": "now.", "type": "text", "index": 0}], + response_metadata={"model_provider": "anthropic"}, + ), + AIMessageChunk( + content=[ + { + "type": "tool_use", + "name": "get_weather", + "input": {}, + "id": "toolu_abc123", + "index": 1, + } + ], + tool_call_chunks=[ + { + "type": "tool_call_chunk", + "name": "get_weather", + "args": "", + "id": "toolu_abc123", + "index": 1, + } + ], + response_metadata={"model_provider": "anthropic"}, + ), + AIMessageChunk( + content=[{"type": "input_json_delta", "partial_json": "", "index": 1}], + tool_call_chunks=[ + { + "name": None, + "args": "", + "id": None, + "index": 1, + "type": "tool_call_chunk", + } + ], + response_metadata={"model_provider": "anthropic"}, + ), + AIMessageChunk( + content=[ + {"type": "input_json_delta", "partial_json": '{"loca', "index": 1} + ], + tool_call_chunks=[ + { + "name": None, + "args": '{"loca', + "id": None, + "index": 1, + "type": "tool_call_chunk", + } + ], + response_metadata={"model_provider": "anthropic"}, + ), + AIMessageChunk( + content=[ + {"type": "input_json_delta", "partial_json": 'tion": "San ', "index": 1} + ], + tool_call_chunks=[ + { + "name": None, + "args": 'tion": "San ', + "id": None, + "index": 1, + "type": "tool_call_chunk", + } + ], + response_metadata={"model_provider": "anthropic"}, + ), + AIMessageChunk( + content=[ + {"type": "input_json_delta", "partial_json": 'Francisco"}', "index": 1} + ], + tool_call_chunks=[ + { + "name": None, + "args": 'Francisco"}', + "id": None, + "index": 1, + "type": "tool_call_chunk", + } + ], + response_metadata={"model_provider": "anthropic"}, + ), + ] + expected_contents: list[types.ContentBlock] = [ + {"type": "text", "text": "Looking ", "index": 0}, + {"type": "text", "text": "now.", "index": 0}, + { + "type": "tool_call_chunk", + "name": "get_weather", + "args": "", + "id": "toolu_abc123", + "index": 1, + }, + {"name": None, "args": "", "id": None, "index": 1, "type": "tool_call_chunk"}, + { + "name": None, + "args": '{"loca', + "id": None, + "index": 1, + "type": "tool_call_chunk", + }, + { + "name": None, + "args": 'tion": "San ', + "id": None, + "index": 1, + "type": "tool_call_chunk", + }, + { + "name": None, + "args": 'Francisco"}', + "id": None, + "index": 1, + "type": "tool_call_chunk", + }, + ] + for chunk, expected in zip(chunks, expected_contents): + assert chunk.content_blocks == [expected] + + full: Optional[AIMessageChunk] = None + for chunk in chunks: + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + + expected_content = [ + {"type": "text", "text": "Looking now.", "index": 0}, + { + "type": "tool_use", + "name": "get_weather", + "partial_json": '{"location": "San Francisco"}', + "input": {}, + "id": "toolu_abc123", + "index": 1, + }, + ] + assert full.content == expected_content + + expected_content_blocks = [ + {"type": "text", "text": "Looking now.", "index": 0}, + { + "type": "tool_call_chunk", + "name": "get_weather", + "args": '{"location": "San Francisco"}', + "id": "toolu_abc123", + "index": 1, + }, + ] + assert full.content_blocks == expected_content_blocks + + +def test_convert_to_v1_from_anthropic_input() -> None: + message = HumanMessage( + [ + {"type": "text", "text": "foo"}, + { + "type": "document", + "source": { + "type": "base64", + "data": "", + "media_type": "application/pdf", + }, + }, + { + "type": "document", + "source": { + "type": "url", + "url": "", + }, + }, + { + "type": "document", + "source": { + "type": "content", + "content": [ + {"type": "text", "text": "The grass is green"}, + {"type": "text", "text": "The sky is blue"}, + ], + }, + "citations": {"enabled": True}, + }, + { + "type": "document", + "source": { + "type": "text", + "data": "", + "media_type": "text/plain", + }, + }, + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": "", + }, + }, + { + "type": "image", + "source": { + "type": "url", + "url": "", + }, + }, + { + "type": "image", + "source": { + "type": "file", + "file_id": "", + }, + }, + { + "type": "document", + "source": {"type": "file", "file_id": ""}, + }, + ] + ) + + expected: list[types.ContentBlock] = [ + {"type": "text", "text": "foo"}, + { + "type": "file", + "base64": "", + "mime_type": "application/pdf", + }, + { + "type": "file", + "url": "", + }, + { + "type": "non_standard", + "value": { + "type": "document", + "source": { + "type": "content", + "content": [ + {"type": "text", "text": "The grass is green"}, + {"type": "text", "text": "The sky is blue"}, + ], + }, + "citations": {"enabled": True}, + }, + }, + { + "type": "text-plain", + "text": "", + "mime_type": "text/plain", + }, + { + "type": "image", + "base64": "", + "mime_type": "image/jpeg", + }, + { + "type": "image", + "url": "", + }, + { + "type": "image", + "id": "", + }, + { + "type": "file", + "id": "", + }, + ] + + assert message.content_blocks == expected diff --git a/libs/core/tests/unit_tests/messages/block_translators/test_langchain_v0.py b/libs/core/tests/unit_tests/messages/block_translators/test_langchain_v0.py new file mode 100644 index 0000000000000..c586f134075de --- /dev/null +++ b/libs/core/tests/unit_tests/messages/block_translators/test_langchain_v0.py @@ -0,0 +1,79 @@ +from langchain_core.messages import HumanMessage +from langchain_core.messages import content as types +from tests.unit_tests.language_models.chat_models.test_base import ( + _content_blocks_equal_ignore_id, +) + + +def test_convert_to_v1_from_openai_input() -> None: + message = HumanMessage( + content=[ + {"type": "text", "text": "Hello"}, + { + "type": "image", + "source_type": "url", + "url": "https://example.com/image.png", + }, + { + "type": "image", + "source_type": "base64", + "data": "", + "mime_type": "image/png", + }, + { + "type": "file", + "source_type": "url", + "url": "", + }, + { + "type": "file", + "source_type": "base64", + "data": "", + "mime_type": "application/pdf", + }, + { + "type": "audio", + "source_type": "base64", + "data": "", + "mime_type": "audio/mpeg", + }, + { + "type": "file", + "source_type": "id", + "id": "", + }, + ] + ) + + expected: list[types.ContentBlock] = [ + {"type": "text", "text": "Hello"}, + { + "type": "image", + "url": "https://example.com/image.png", + }, + { + "type": "image", + "base64": "", + "mime_type": "image/png", + }, + { + "type": "file", + "url": "", + }, + { + "type": "file", + "base64": "", + "mime_type": "application/pdf", + }, + { + "type": "audio", + "base64": "", + "mime_type": "audio/mpeg", + }, + { + "type": "file", + "file_id": "", + }, + ] + + assert _content_blocks_equal_ignore_id(message.content_blocks, expected) diff --git a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py index 3602d9eb08d03..2ed2086ea4443 100644 --- a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py +++ b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py @@ -1,41 +1,12 @@ from typing import Optional -from langchain_core.language_models.fake_chat_models import ParrotFakeChatModel -from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.messages import AIMessage, AIMessageChunk, HumanMessage from langchain_core.messages import content as types from tests.unit_tests.language_models.chat_models.test_base import ( _content_blocks_equal_ignore_id, ) -def test_v0_to_v1_content_blocks() -> None: - llm = ParrotFakeChatModel() - messages = [ - { - "role": "user", - # v0 format - "content": [ - { - "type": "image", - "source_type": "url", - "url": "https://example.com/image.png", - } - ], - } - ] - response = llm.invoke(messages) - assert len(response.content_blocks) == 1 - expected_content_blocks = [ - { - "type": "image", - "url": "https://example.com/image.png", - } - ] - assert _content_blocks_equal_ignore_id( - response.content_blocks, expected_content_blocks - ) - - def test_convert_to_v1_from_responses() -> None: message = AIMessage( [ @@ -261,3 +232,64 @@ def test_convert_to_v1_from_responses_chunk() -> None: }, ] assert full.content_blocks == expected_content_blocks + + +def test_convert_to_v1_from_openai_input() -> None: + message = HumanMessage( + content=[ + {"type": "text", "text": "Hello"}, + { + "type": "image_url", + "image_url": {"url": "https://example.com/image.png"}, + }, + { + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg..."}, + }, + { + "type": "input_audio", + "input_audio": { + "format": "wav", + "data": "", + }, + }, + { + "type": "file", + "file": { + "filename": "draconomicon.pdf", + "file_data": "", + }, + }, + { + "type": "file", + "file": {"file_id": ""}, + }, + ] + ) + + expected: list[types.ContentBlock] = [ + {"type": "text", "text": "Hello"}, + { + "type": "image", + "url": "https://example.com/image.png", + }, + { + "type": "image", + "base64": "/9j/4AAQSkZJRg...", + "mime_type": "image/jpeg", + }, + { + "type": "audio", + "base64": "", + "mime_type": "audio/wav", + }, + { + "type": "file", + "base64": "", + "mime_type": "application/pdf", + "extras": {"filename": "draconomicon.pdf"}, + }, + {"type": "file", "file_id": ""}, + ] + + assert _content_blocks_equal_ignore_id(message.content_blocks, expected) diff --git a/libs/core/tests/unit_tests/messages/block_translators/test_registration.py b/libs/core/tests/unit_tests/messages/block_translators/test_registration.py new file mode 100644 index 0000000000000..74c16d30a248a --- /dev/null +++ b/libs/core/tests/unit_tests/messages/block_translators/test_registration.py @@ -0,0 +1,29 @@ +import pkgutil +from pathlib import Path + +import pytest + +from langchain_core.messages.block_translators import PROVIDER_TRANSLATORS + + +def test_all_providers_registered() -> None: + """Test that all block translators implemented in langchain-core are registered. + + If this test fails, it is likely that a block translator is implemented but not + registered on import. Check that the provider is included in + ``langchain_core.messages.block_translators.__init__._register_translators``. + """ + package_path = ( + Path(__file__).parents[4] / "langchain_core" / "messages" / "block_translators" + ) + + for module_info in pkgutil.iter_modules([str(package_path)]): + module_name = module_info.name + + # Skip the __init__ module, any private modules, and ``langchain_v0``, which is + # only used to parse v0 multimodal inputs. + if module_name.startswith("_") or module_name == "langchain_v0": + continue + + if module_name not in PROVIDER_TRANSLATORS: + pytest.fail(f"Block translator not registered: {module_name}") diff --git a/libs/core/tests/unit_tests/messages/test_ai.py b/libs/core/tests/unit_tests/messages/test_ai.py index 67b0a2dc9680c..4f623c0910c87 100644 --- a/libs/core/tests/unit_tests/messages/test_ai.py +++ b/libs/core/tests/unit_tests/messages/test_ai.py @@ -1,3 +1,7 @@ +from typing import Union, cast + +import pytest + from langchain_core.load import dumpd, load from langchain_core.messages import AIMessage, AIMessageChunk from langchain_core.messages import content as types @@ -310,3 +314,92 @@ def test_content_blocks() -> None: } ] assert message.content == "" + + # Non-standard + standard_content_1: list[types.ContentBlock] = [ + {"type": "non_standard", "index": 0, "value": {"foo": "bar "}} + ] + standard_content_2: list[types.ContentBlock] = [ + {"type": "non_standard", "index": 0, "value": {"foo": "baz"}} + ] + chunk_1 = AIMessageChunk( + content=cast("Union[str, list[Union[str, dict]]]", standard_content_1) + ) + chunk_2 = AIMessageChunk( + content=cast("Union[str, list[Union[str, dict]]]", standard_content_2) + ) + merged_chunk = chunk_1 + chunk_2 + assert merged_chunk.content == [ + {"type": "non_standard", "index": 0, "value": {"foo": "bar baz"}}, + ] + + # Test non-standard + non-standard + chunk_1 = AIMessageChunk( + content=[ + { + "type": "non_standard", + "index": 0, + "value": {"type": "non_standard_tool", "foo": "bar"}, + } + ] + ) + chunk_2 = AIMessageChunk( + content=[ + { + "type": "non_standard", + "index": 0, + "value": {"type": "input_json_delta", "partial_json": "a"}, + } + ] + ) + chunk_3 = AIMessageChunk( + content=[ + { + "type": "non_standard", + "index": 0, + "value": {"type": "input_json_delta", "partial_json": "b"}, + } + ] + ) + merged_chunk = chunk_1 + chunk_2 + chunk_3 + assert merged_chunk.content == [ + { + "type": "non_standard", + "index": 0, + "value": {"type": "non_standard_tool", "foo": "bar", "partial_json": "ab"}, + } + ] + + # Test standard + non-standard with same index + standard_content_1 = [ + {"type": "web_search_call", "id": "ws_123", "query": "web query", "index": 0} + ] + standard_content_2 = [{"type": "non_standard", "value": {"foo": "bar"}, "index": 0}] + chunk_1 = AIMessageChunk( + content=cast("Union[str, list[Union[str, dict]]]", standard_content_1) + ) + chunk_2 = AIMessageChunk( + content=cast("Union[str, list[Union[str, dict]]]", standard_content_2) + ) + merged_chunk = chunk_1 + chunk_2 + assert merged_chunk.content == [ + { + "type": "web_search_call", + "id": "ws_123", + "query": "web query", + "index": 0, + "extras": {"foo": "bar"}, + } + ] + + +def test_provider_warns() -> None: + # Test that major providers warn if content block standardization is not yet + # implemented. + # This test should be removed when all major providers support content block + # standardization. + message = AIMessage("Hello.", response_metadata={"model_provider": "groq"}) + with pytest.warns(match="not yet fully supported for Groq"): + content_blocks = message.content_blocks + + assert content_blocks == [{"type": "text", "text": "Hello."}] diff --git a/libs/partners/anthropic/langchain_anthropic/_compat.py b/libs/partners/anthropic/langchain_anthropic/_compat.py new file mode 100644 index 0000000000000..3b90416232406 --- /dev/null +++ b/libs/partners/anthropic/langchain_anthropic/_compat.py @@ -0,0 +1,245 @@ +from __future__ import annotations + +import json +from typing import Any, Optional, cast + +from langchain_core.messages import content as types + + +def _convert_annotation_from_v1(annotation: types.Annotation) -> dict[str, Any]: + """Right-inverse of _convert_citation_to_v1.""" + if annotation["type"] == "non_standard_annotation": + return annotation["value"] + + if annotation["type"] == "citation": + if "url" in annotation: + # web_search_result_location + out: dict[str, Any] = {} + if cited_text := annotation.get("cited_text"): + out["cited_text"] = cited_text + if "encrypted_index" in annotation.get("extras", {}): + out["encrypted_index"] = annotation["extras"]["encrypted_index"] + if "title" in annotation: + out["title"] = annotation["title"] + out["type"] = "web_search_result_location" + if "url" in annotation: + out["url"] = annotation["url"] + + for key, value in annotation.get("extras", {}).items(): + if key not in out: + out[key] = value + + return out + + if "start_char_index" in annotation.get("extras", {}): + # char_location + out = {"type": "char_location"} + for field in ["cited_text"]: + if value := annotation.get(field): + out[field] = value + if title := annotation.get("title"): + out["document_title"] = title + + for key, value in annotation.get("extras", {}).items(): + out[key] = value + + return out + + if "search_result_index" in annotation.get("extras", {}): + # search_result_location + out = {"type": "search_result_location"} + for field in ["cited_text", "title"]: + if value := annotation.get(field): + out[field] = value + + for key, value in annotation.get("extras", {}).items(): + out[key] = value + + return out + + if "start_block_index" in annotation.get("extras", {}): + # content_block_location + out = {} + if cited_text := annotation.get("cited_text"): + out["cited_text"] = cited_text + if "document_index" in annotation.get("extras", {}): + out["document_index"] = annotation["extras"]["document_index"] + if "title" in annotation: + out["document_title"] = annotation["title"] + + for key, value in annotation.get("extras", {}).items(): + if key not in out: + out[key] = value + + out["type"] = "content_block_location" + return out + + if "start_page_number" in annotation.get("extras", {}): + # page_location + out = {"type": "page_location"} + for field in ["cited_text"]: + if value := annotation.get(field): + out[field] = value + if title := annotation.get("title"): + out["document_title"] = title + + for key, value in annotation.get("extras", {}).items(): + out[key] = value + + return out + + return cast(dict[str, Any], annotation) + + return cast(dict[str, Any], annotation) + + +def _convert_from_v1_to_anthropic( + content: list[types.ContentBlock], + tool_calls: list[types.ToolCall], + model_provider: Optional[str], +) -> list[dict[str, Any]]: + new_content: list = [] + for block in content: + if block["type"] == "text": + if model_provider == "anthropic" and "annotations" in block: + new_block: dict[str, Any] = {"type": "text"} + new_block["citations"] = [ + _convert_annotation_from_v1(a) for a in block["annotations"] + ] + if "text" in block: + new_block["text"] = block["text"] + else: + new_block = {"text": block.get("text", ""), "type": "text"} + new_content.append(new_block) + + elif block["type"] == "tool_call": + new_content.append( + { + "type": "tool_use", + "name": block.get("name", ""), + "input": block.get("args", {}), + "id": block.get("id", ""), + } + ) + + elif block["type"] == "tool_call_chunk": + if isinstance(block["args"], str): + try: + input_ = json.loads(block["args"] or "{}") + except json.JSONDecodeError: + input_ = {} + else: + input_ = block.get("args") or {} + new_content.append( + { + "type": "tool_use", + "name": block.get("name", ""), + "input": input_, + "id": block.get("id", ""), + } + ) + + elif block["type"] == "reasoning" and model_provider == "anthropic": + new_block = {} + if "reasoning" in block: + new_block["thinking"] = block["reasoning"] + new_block["type"] = "thinking" + if signature := block.get("extras", {}).get("signature"): + new_block["signature"] = signature + + new_content.append(new_block) + + elif block["type"] == "web_search_call" and model_provider == "anthropic": + new_block = {} + if "id" in block: + new_block["id"] = block["id"] + + if (query := block.get("query")) and "input" not in block: + new_block["input"] = {"query": query} + elif input_ := block.get("extras", {}).get("input"): + new_block["input"] = input_ + elif partial_json := block.get("extras", {}).get("partial_json"): + new_block["input"] = {} + new_block["partial_json"] = partial_json + else: + pass + new_block["name"] = "web_search" + new_block["type"] = "server_tool_use" + new_content.append(new_block) + + elif block["type"] == "web_search_result" and model_provider == "anthropic": + new_block = {} + if "content" in block.get("extras", {}): + new_block["content"] = block["extras"]["content"] + if "id" in block: + new_block["tool_use_id"] = block["id"] + new_block["type"] = "web_search_tool_result" + new_content.append(new_block) + + elif block["type"] == "code_interpreter_call" and model_provider == "anthropic": + new_block = {} + if "id" in block: + new_block["id"] = block["id"] + if (code := block.get("code")) and "input" not in block: + new_block["input"] = {"code": code} + elif input_ := block.get("extras", {}).get("input"): + new_block["input"] = input_ + elif partial_json := block.get("extras", {}).get("partial_json"): + new_block["input"] = {} + new_block["partial_json"] = partial_json + else: + pass + new_block["name"] = "code_execution" + new_block["type"] = "server_tool_use" + new_content.append(new_block) + + elif ( + block["type"] == "code_interpreter_result" and model_provider == "anthropic" + ): + new_block = {} + if (output := block.get("output", [])) and len(output) == 1: + code_interpreter_output = output[0] + code_execution_content = {} + if "content" in block.get("extras", {}): + code_execution_content["content"] = block["extras"]["content"] + elif (file_ids := block.get("file_ids")) and isinstance(file_ids, list): + code_execution_content["content"] = [ + {"file_id": file_id, "type": "code_execution_output"} + for file_id in file_ids + ] + else: + code_execution_content["content"] = [] + if "return_code" in code_interpreter_output: + code_execution_content["return_code"] = code_interpreter_output[ + "return_code" + ] + code_execution_content["stderr"] = code_interpreter_output.get( + "stderr", "" + ) + if "stdout" in code_interpreter_output: + code_execution_content["stdout"] = code_interpreter_output["stdout"] + code_execution_content["type"] = "code_execution_result" + new_block["content"] = code_execution_content + elif "error_code" in block.get("extras", {}): + code_execution_content = { + "error_code": block["extras"]["error_code"], + "type": "code_execution_tool_result_error", + } + new_block["content"] = code_execution_content + else: + pass + if "id" in block: + new_block["tool_use_id"] = block["id"] + new_block["type"] = "code_execution_tool_result" + new_content.append(new_block) + + elif ( + block["type"] == "non_standard" + and "value" in block + and model_provider == "anthropic" + ): + new_content.append(block["value"]) + else: + new_content.append(block) + + return new_content diff --git a/libs/partners/anthropic/langchain_anthropic/chat_models.py b/libs/partners/anthropic/langchain_anthropic/chat_models.py index 641b630ddb35c..b038ba09ff5d6 100644 --- a/libs/partners/anthropic/langchain_anthropic/chat_models.py +++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py @@ -33,6 +33,7 @@ ToolMessage, is_data_content_block, ) +from langchain_core.messages import content as types from langchain_core.messages.ai import InputTokenDetails, UsageMetadata from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk from langchain_core.output_parsers import JsonOutputKeyToolsParser, PydanticToolsParser @@ -51,6 +52,7 @@ _get_default_async_httpx_client, _get_default_httpx_client, ) +from langchain_anthropic._compat import _convert_from_v1_to_anthropic from langchain_anthropic.output_parsers import extract_tool_calls _message_type_lookups = { @@ -212,7 +214,7 @@ def _merge_messages( def _format_data_content_block(block: dict) -> dict: """Format standard data content block to format expected by Anthropic.""" if block["type"] == "image": - if block["source_type"] == "url": + if "url" in block: if block["url"].startswith("data:"): # Data URI formatted_block = { @@ -224,16 +226,24 @@ def _format_data_content_block(block: dict) -> dict: "type": "image", "source": {"type": "url", "url": block["url"]}, } - elif block["source_type"] == "base64": + elif "base64" in block or block.get("source_type") == "base64": formatted_block = { "type": "image", "source": { "type": "base64", "media_type": block["mime_type"], - "data": block["data"], + "data": block.get("base64") or block.get("data", ""), }, } - elif block["source_type"] == "id": + elif "file_id" in block: + formatted_block = { + "type": "image", + "source": { + "type": "file", + "file_id": block["file_id"], + }, + } + elif block.get("source_type") == "id": formatted_block = { "type": "image", "source": { @@ -243,7 +253,7 @@ def _format_data_content_block(block: dict) -> dict: } else: msg = ( - "Anthropic only supports 'url' and 'base64' source_type for image " + "Anthropic only supports 'url', 'base64', or 'id' keys for image " "content blocks." ) raise ValueError( @@ -251,7 +261,7 @@ def _format_data_content_block(block: dict) -> dict: ) elif block["type"] == "file": - if block["source_type"] == "url": + if "url" in block: formatted_block = { "type": "document", "source": { @@ -259,16 +269,16 @@ def _format_data_content_block(block: dict) -> dict: "url": block["url"], }, } - elif block["source_type"] == "base64": + elif "base64" in block or block.get("source_type") == "base64": formatted_block = { "type": "document", "source": { "type": "base64", "media_type": block.get("mime_type") or "application/pdf", - "data": block["data"], + "data": block.get("base64") or block.get("data", ""), }, } - elif block["source_type"] == "text": + elif block.get("source_type") == "text": formatted_block = { "type": "document", "source": { @@ -277,7 +287,15 @@ def _format_data_content_block(block: dict) -> dict: "data": block["text"], }, } - elif block["source_type"] == "id": + elif "file_id" in block: + formatted_block = { + "type": "document", + "source": { + "type": "file", + "file_id": block["file_id"], + }, + } + elif block.get("source_type") == "id": formatted_block = { "type": "document", "source": { @@ -285,6 +303,22 @@ def _format_data_content_block(block: dict) -> dict: "file_id": block["id"], }, } + else: + msg = ( + "Anthropic only supports 'url', 'base64', or 'id' keys for file " + "content blocks." + ) + raise ValueError(msg) + + elif block["type"] == "text-plain": + formatted_block = { + "type": "document", + "source": { + "type": "text", + "media_type": block.get("mime_type") or "text/plain", + "data": block["text"], + }, + } else: msg = f"Block of type {block['type']} is not supported." @@ -294,7 +328,10 @@ def _format_data_content_block(block: dict) -> dict: for key in ["cache_control", "citations", "title", "context"]: if key in block: formatted_block[key] = block[key] + elif (metadata := block.get("extras")) and key in metadata: + formatted_block[key] = metadata[key] elif (metadata := block.get("metadata")) and key in metadata: + # Backward compat formatted_block[key] = metadata[key] return formatted_block @@ -741,13 +778,11 @@ class Joke(BaseModel): }, { "type": "image", - "source_type": "base64", - "data": image_data, + "base64": image_data, "mime_type": "image/jpeg", }, { "type": "image", - "source_type": "url", "url": image_url, }, ], @@ -781,7 +816,6 @@ class Joke(BaseModel): }, { "type": "image", - "source_type": "id", "id": "file_abc123...", }, ], @@ -810,9 +844,8 @@ class Joke(BaseModel): "Summarize this document.", { "type": "file", - "source_type": "base64", "mime_type": "application/pdf", - "data": data, + "base64": data, }, ] ) @@ -846,7 +879,6 @@ class Joke(BaseModel): }, { "type": "file", - "source_type": "id", "id": "file_abc123...", }, ], @@ -1462,6 +1494,23 @@ def _get_request_payload( **kwargs: dict, ) -> dict: messages = self._convert_input(input_).to_messages() + + for idx, message in enumerate(messages): + # Translate v1 content + if ( + isinstance(message, AIMessage) + and message.response_metadata.get("output_version") == "v1" + ): + messages[idx] = message.model_copy( + update={ + "content": _convert_from_v1_to_anthropic( + cast(list[types.ContentBlock], message.content), + message.tool_calls, + message.response_metadata.get("model_provider"), + ) + } + ) + system, formatted_messages = _format_messages(messages) # If cache_control is provided in kwargs, add it to last message @@ -1626,6 +1675,7 @@ def _format_output(self, data: Any, **kwargs: Any) -> ChatResult: llm_output = { k: v for k, v in data_dict.items() if k not in ("content", "role", "type") } + response_metadata = {"model_provider": "anthropic"} if "model" in llm_output and "model_name" not in llm_output: llm_output["model_name"] = llm_output["model"] if ( @@ -1633,15 +1683,18 @@ def _format_output(self, data: Any, **kwargs: Any) -> ChatResult: and content[0]["type"] == "text" and not content[0].get("citations") ): - msg = AIMessage(content=content[0]["text"]) + msg = AIMessage( + content=content[0]["text"], response_metadata=response_metadata + ) elif any(block["type"] == "tool_use" for block in content): tool_calls = extract_tool_calls(content) msg = AIMessage( content=content, tool_calls=tool_calls, + response_metadata=response_metadata, ) else: - msg = AIMessage(content=content) + msg = AIMessage(content=content, response_metadata=response_metadata) msg.usage_metadata = _create_usage_metadata(data.usage) return ChatResult( generations=[ChatGeneration(message=msg)], @@ -2363,7 +2416,7 @@ def _make_message_chunk_from_anthropic_event( elif event.type == "message_delta" and stream_usage: usage_metadata = _create_usage_metadata(event.usage) message_chunk = AIMessageChunk( - content="", + content="" if coerce_content_to_string else [], usage_metadata=usage_metadata, response_metadata={ "stop_reason": event.delta.stop_reason, @@ -2375,6 +2428,8 @@ def _make_message_chunk_from_anthropic_event( else: pass + if message_chunk: + message_chunk.response_metadata["model_provider"] = "anthropic" return message_chunk, block_start_event diff --git a/libs/partners/anthropic/tests/cassettes/test_agent_loop.yaml.gz b/libs/partners/anthropic/tests/cassettes/test_agent_loop.yaml.gz new file mode 100644 index 0000000000000000000000000000000000000000..d53dffb02da7f2011eb20a1282a0470f96fe817b GIT binary patch literal 2028 zcmV7^UY;X<+e9@aN z8*GqluP=aI=f|Ix41`I@ZO+SXRq~WX-K|!)++Vkf=CbP<3<&HeX$B9I}Y`j;6%-XM4fxi^~y@eaX`%+OXl`| zpx#aA-;<>+lm->INViL7%#-#pHyAn=xt~ueb0%hP7T!${R8kX3?HD@X1mc}T&KuB>5` zJdm#W-{FYtx}%!{rldyASXs_?-H|c1_n|`?mRhOkIcWxZ(1nkU*)*xIu7z<%1Us zNvn+^YCNaE_Mt`vO;>&*M-lH6qcl>hk2`O=jZ&dxn?|lm*rpJ&EjJiwiio8J^RV_D zkuM_27F@a8^$@E(NL4W$7~jCiS$%_e5;KO-=$L9XH|R;E^`!5mF*H@St3k=;xMMJ~ zmy^4p2*#-I#)d!K`%}kskd&v3*0S+v>=~&T9^=`0*c}7B>>%MuN*(HU&AefmeXRS| zHjws9DD`dV?1iVY#)b&o;8>olux!^sx~Z8E_l$?ZUiyp=Nk^>E{=S>c?HcPSkA`tJ z*K*YpQR>rhZff=plT~+ZSbVUk0G-BL3^74eE5VrJx?;%gVjx*0U>;BtuxM}4Ola@t zi^|uv+?N+splgsMlY893g~tjoN2)&z+K&T=hag3O#m75ou-Mjk#FAyMfuWT*;|bcq zW#1upEclMya)X?qbief^fUTp{9aB$Nqzy-qVTzNb3kgfd1iN+)aon)G`hy-3%Ff`U z^46m{@qqLUw$K{1o($m_9l)H7N64M7n8(M9%Y)8 zx{#2PHL?|k*XDR|NCm&*)QT~kGLSw)*g)%9x8BXAk}!D0%nQq$8*|2uO9FR(b$MYH z6;O~aF3k%+Eti0j;Srgaq}-mB1zA=0=Kf`YN1A5E%1@KlXUFIz;TzHgxJ(k7RJ|n4 zYhYrwiBbRt``6E@77>NA`qDw(p_q literal 0 HcmV?d00001 diff --git a/libs/partners/anthropic/tests/cassettes/test_agent_loop_streaming.yaml.gz b/libs/partners/anthropic/tests/cassettes/test_agent_loop_streaming.yaml.gz new file mode 100644 index 0000000000000000000000000000000000000000..8e76e86628fef510449e73e3c8cdaef7343961f1 GIT binary patch literal 3179 zcmV-x43zU9iwFRS&ZKAp|Lt2@Z{o-nexF~Fc^auDrfn0_O{94U1RK-Ax!B5~w;p`*#*6w*JkmBuyUwXx! zg)=jqY7uacfO{?`C{m7Nb~}-gcLt;kjEBVsLWxdsAc?ooUHm&?$sG?9MAVHYpKDSpU<0xZL~~W9NlhMjY8Q03cX?pK z_xLipr&EJ}Rf!|m0Ey3U2!dX6O$Pl*?F=2_fpxYI91ESMD$$@bLWP6G)Bpe9K4y_{d0?z?YOI_q&8zyZ-1!>2Qa@&FUQCv1=F zY|^tg-#)A_PKVEQU+R&sD$U2Qle8~APu3$+4TZo=d7ldb3QSD!z*@j1q;Gc#5ZBI3 zM$8a5$YspFx7@f4MzE>ji)&L_5!JIBqre4r?h%9L&^DT4rU&y39onfuoNCet>rAKG zi79?x4kxBEp0ws7le7fFRJbwsEc(um!pbD0P&U?&LAE_`0RR{FLG|ebhykdO2(?n3 z;yoQb+~)v;#kndF>zDB))fhU`nv+y#=ncj~G+?$7ECOPs;Ddw_h#rDI#8DX~x=*KA zWZ(1vgLmi#cL7e9rp!-ENCbx^9Uug{?Bsg016=apMneqPI@6wkl_Q_$05U9^Vsu>VlZe3;$Tz5Mgkq_b* zM~5PGDn51(MGo2i1m{n1{siY6z&W;(C=3#7*VN!=Bx#JI;}Ek^E>aPmJs=1IpmYEf zrU)mEVDv+Ugr$r_yF?URl1t*+py)sq+iR1dbI7)+Gr@ODl7KTP%M?jAH7FC{px3ej zb!{{|nilk$LSKED^to{8bHa!iyx;`AO)MoV6Sp9Vng&Svm;(iB2>??$l*ifJ$9Cg&Nl=s~sNRlXyUGD^V(44h@p1i3r~-9ieU4cDB2V2YkW{+=q$fw#%1 zt!bIZD^=4H)h8vF5@dF`@~H90@mw?gnSTTysSp%ux@>cAxCM|9#>| z*B2>NkQF3f-Ojc~lPO$`#9#sfNDy702hiv}u>A^@`P@Pe0DuF2s`#kuOG`hlI1yB7 zyk~Vwy%ionw=u|R`L1={lrrKyc5fhS^^q^LP+M-eEl8G3*K&)>fj;k?%WdgI(Bi{! z_mJE2^L_kAobl7jok}4)Xcr`Rs_wMAcI25p2>l$|ZxleHNLmrfC<0UQ4oLN_Qek2V zOD<~!Ey@xo1Z?nxC)}~)&v8|&Uzz1)gxC$|Z|Tj+CORtIPeB`kzOxvgr+S7ZHjViAGGvXUbxpnp1 z2M%=11gCUgki66cr*vO(dBg<9G98s+z5e^ z_Pu<1SPWhW3U&FkAbBk)6!|n)Wi&Gf9e-RLM`JshKn35m-0raFCJ3u?Tk*>SmE)m= zCACQwP2ebpc+(@wDxa9K2I_OA@-%R5vZu`+yd_g2~op`Og5G4gZ7Zi8{Hh~ zy^L|xJJvf**--zB`tm9X0}UV;&(VwTaMlNxKXjrC0k+PRrK`UOSHh7>uF$PaMQhad zzbyur$eA<=dT)H>9tr+U?jjC{5E&TcDL zx66*ry5r1ZZ*{E7+ncPTu`zJrRg1okgJ$cBw3_1Y(0s#3n<%zY)pb5-^eEDONpdRR z9yO)uwq@4sQB#IXB?TWuO}T2@nd8j(-wt(dC|3_8DJ_16KyM{s?GZs6kkATA&c$&I zZ5QBDX&$4yA0Y>N+ir1BTmE1cELMfA23#>!MKjAHii*viDRW-29$Of6dyU=}0TyPJ zDXSiciH>HGfrvT6F7MaOE7wC3O;d|8>I@BcabM}RaaimEvNEhi#S7=47hW)T5`Sz+ zyMWIQKQBAop-k9E^>?CR*`KeY3Z=d#DstuFzN||(_$TGDZvW8L499A8!yt`Aul~et z>viO7yv7=%HCEk$XDzM&La^e671w?>jUht|!*{PY$#6YOeS1 zUe5O6{Y&_ci#P8sWO;k-a&9YEFJbg5&0ek2cXeq@8wL*JB-PR^sXbFjdd&CHCeVOK zp}32}IBg`Bk;QJB*PdRu+GG7P(+U=0-pC@LXK6lAi5IS6;mu~vje_4|9d5!Air7@E z#pd$y?)Aqnwy~sNm7tTW%|TD{oQWo@sp@$XYeGM$f#>|@8_UtUezo}9Z7kW#Hm*;b-*vLyMFww1sBHbX?O*VT0HA7#>6dGpxIYW4uOCJ*<9{(FsXzq7NrQJ2JQ z=sH2ahI>hWTn6No%aFSMxYA~yT81c1F*lIOm{n^xME(Ew#8(YY*QV6UEaBc3%56vY zO$mPOn4+rxV}f6>RXfywxwtFL0H8oZP_FCd*yl>HeJkPmt=gdk2kXUCj-)eQtO^|m zNpOnexATP`w{pBq8<*_?71Ut@q^Z!MvJCH7!fu1PogrTDG{**hH#kknxh5T%z<-=& z)7v)?K8~vcP!N{TgRC<=DaEhtM0l6rPquqisnvpy5Ompad=0T&`=ZDPSt$3-g()qM z<2cx3wqt^h#e?AAMp99m+f$8VK R^!FH;{tqr?1U$k%004{e5`h2! literal 0 HcmV?d00001 diff --git a/libs/partners/anthropic/tests/cassettes/test_citations.yaml.gz b/libs/partners/anthropic/tests/cassettes/test_citations.yaml.gz new file mode 100644 index 0000000000000000000000000000000000000000..c704ad451e382cecff40e25c5a842eb552961b45 GIT binary patch literal 3388 zcmV-C4a4#uiwFP^uB2!J|Lt2@Z{tQ1e$TIPo&pS@3`yDE$Uq(}*`%nk=!lYdz44Mn ziPwtML1BOVRyWx^BnMI68bl#wD{U7PIU3iRTQcn${s0N@jHZ;R%-H$#Bgg8x8?4L| z)`l+8f<`xiv7ylQF3%cNljt21sZtpHKSv#*oB(kiqhSRJ>>v*TG{y*&K&IUK?N zu9nWr!z_-&=;ix&2=MPYmVY+`@B5nm?txZtPIOO_KYNmdU71ZW(PTCz)=dU~-V^6N zao!W>J#qd&B~Gj~F_4eUT=S}XX_3he>~C*RQmT)}{aTx*3sf)&D- zHV{2Vmql0Wv=;^=(1r|)-$K)Hw=3)t5|GU5I{zU#tWafKN>#)VqP0Vj0!!z*0?OG| zDhL)!+|1=UD?ygyEyK?md=x1ukReXeBrHEIG-c|1&R@at;#zG=F@k~86d?%61>wq) zw-G=Df*?*HP-~TmC&jvV@He7`Ltm|g_iTq=ya(!e)h(FPXhLCXEdwjiRdxlXr` z{LJFqwc&vOhXqPf*tnI5h43%@EV)Sdqgx7qvzbDW7JQ0;s{i~HDJih`;j)J}6jkJB zO%^Ao@ybW3g=N20w_QXW0tA7==h*P2$ml>Yz}kIxr3&W(N(4KwDk=FXgaEiUSkoVF zYR+*H93-Ej6HyQL! zqyO0S$02LGLPu@7fiVuYBUjyhjxBZ|yQ09TPPraHa@?5=>!WPS`@MCh(nzy6unQrC zzGRlsl(YRzfBu>XZ||G_yhuoyO2Tf=P=*fFO;oI*8ntnvEU~F)g?o(oPOp`zL}{?5 zBwtwUNa>LlP;Fh}%QN(9uc#C?p;N8y@CmByOYey`xJ_QGkCf>nHJ(k&GE@WN%TiH5 z(LBpN{Nq%ux^dHRsA(pimk6j1&Zirfc~M?!ik%><(=wi^O!?YnmHb>0C%!yV%MnIR z{|GCdY16}?0W#6rC{4jN z72r;uP-b>?b^sC84h_Feqa7Md>>|HsDnI9AM!^1L=Q<}%RKQ~fd$ie&rHJKb2KjBS z%0i$?Ei_bEAfK9D2@y+j#&t%*gbP9UQ0*gbS2U#^XaH$of0442;Bri3Ta4j%jq*hWM(!)Mf5Y!3K`} zF)r^TRNm_Z-t@Ts4~F&<>PGD!jh4C}O`zhx6J}Ey0ks0e#bU)!Scwy{L4^VZ??L+_Ik7T z*I#4zGP~Kj(&T*E_12b_USsG@p1s+nXMJf-8m4Z}Y(%l*RrGQJU9S_#oN?@UAQEMQ zHRx!D!7^?{wh7cMPHLssj{Z{518sjDB#l)F`fS|;Ez7lI+L#p9tjM;M?ZE;OhDzx{ zM*3bJK7U+B_B2JqREXyw%{GOZ#z>R2`VD?SZlu&vFfEt*oZv~Vw4uc(jXER-n>5mZ){Bf}Dzwt>vc|o(6(&UFeti7dzdbZe>XjEOzy`|U^ zE05INTZ%2IfJfBaQfvv0$~TI~N3Q8>p$b3(vgeu(;+8mE3!thC-lto9=XF~CcQOcm z5FirH!yOJnPT{hKo9Fl>D!mF1JfNHq)!I>AF_P+~mUpQinaYkQF-XOy)2?B#V0~Ad zbv(p+-=FzzbaKo?K*UKAh$E7>*A1kLR99;IvzvY3N!Wn!pBAlY{VXLI9aaG7)UD6wy})izL0u zvKrvP=eCqONL3LXnabn+O6ofVn7t`%`uiGiEqpecu06+P>j~I~)#-Z~E%|{w-RYlb z@eghc2#B2X-TtL*J~=&=6Hwpn_m!fIodQ+PHgY##TE0p?t$=EgM;!n0^T8m7EiJw1 zhVOMsaCt$7N#WkwoC6oxc==54osf%aQZt=!q0eP1FM)J+j;l4PeBZa1FOwQSR)#iw zC7J#ABA(nFarl{ISBF*eZy_+w^#n%ipC06w#|p3sKd?2)O131rKM@MaeR7 z?{_TsJC^$$%TIa7^0Qx{9KM!`cKS$o(z#hM@Fm(FS)TpQ?$o*ZXiEJ5_G_8Grl7}q z`6GEFj!B|JPI`a>CcUra6Zz$SYE+aEsO^x7jQ8Mlps>SjO%`MqfrR5t_4 z!*ehSy6DA6-S8!E%C@Jk9?9Q4AoVGT7Yi4=_3@V;$c|7-C z+U!#bGN&G7gjQbhvgq8CA~GJj;$_h}fbGr2E_qpW?nzNGTs`#^_$qJ}O<66*E&x}u z4=&Q19KqNv;39nbo%L1OU4bQ_%FUjtA>eF*vh(RX@@_i;+=V7HJ^<8Eeng)@=_J2* z1^lJzD&nA)O<6M;*V+3CPqLJb%;lk z97xVZehUeB%r3NuGMm71Xmfhtzbt)pCG5MZ#gCQ8b_Fi8d;hOLwmY~MS9kp{KDHxP ze&POq|3aUr{KEZzu|s|%6mKg`MoKKtox=ve None: @@ -65,6 +66,9 @@ def test_stream() -> None: assert chunks_with_model_name == 1 # check token usage is populated assert isinstance(full, AIMessageChunk) + assert len(full.content_blocks) == 1 + assert full.content_blocks[0]["type"] == "text" + assert full.content_blocks[0]["text"] assert full.usage_metadata is not None assert full.usage_metadata["input_tokens"] > 0 assert full.usage_metadata["output_tokens"] > 0 @@ -105,6 +109,9 @@ async def test_astream() -> None: ) # check token usage is populated assert isinstance(full, AIMessageChunk) + assert len(full.content_blocks) == 1 + assert full.content_blocks[0]["type"] == "text" + assert full.content_blocks[0]["text"] assert full.usage_metadata is not None assert full.usage_metadata["input_tokens"] > 0 assert full.usage_metadata["output_tokens"] > 0 @@ -421,6 +428,14 @@ def test_tool_use() -> None: assert isinstance(tool_call["args"], dict) assert "location" in tool_call["args"] + content_blocks = response.content_blocks + assert len(content_blocks) == 2 + assert content_blocks[0]["type"] == "text" + assert content_blocks[0]["text"] + assert content_blocks[1]["type"] == "tool_call" + assert content_blocks[1]["name"] == "get_weather" + assert content_blocks[1]["args"] == tool_call["args"] + # Test streaming llm = ChatAnthropic( model="claude-3-7-sonnet-20250219", # type: ignore[call-arg] @@ -440,6 +455,8 @@ def test_tool_use() -> None: first = False else: gathered = gathered + chunk # type: ignore[assignment] + for block in chunk.content_blocks: + assert block["type"] in ("text", "tool_call_chunk") assert len(chunks) > 1 assert isinstance(gathered.content, list) assert len(gathered.content) == 2 @@ -461,6 +478,14 @@ def test_tool_use() -> None: assert "location" in tool_call["args"] assert tool_call["id"] is not None + content_blocks = gathered.content_blocks + assert len(content_blocks) == 2 + assert content_blocks[0]["type"] == "text" + assert content_blocks[0]["text"] + assert content_blocks[1]["type"] == "tool_call_chunk" + assert content_blocks[1]["name"] == "get_weather" + assert content_blocks[1]["args"] + # Testing token-efficient tools # https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use assert gathered.usage_metadata @@ -500,6 +525,13 @@ def test_builtin_tools() -> None: assert isinstance(response, AIMessage) assert response.tool_calls + content_blocks = response.content_blocks + assert len(content_blocks) == 2 + assert content_blocks[0]["type"] == "text" + assert content_blocks[0]["text"] + assert content_blocks[1]["type"] == "tool_call" + assert content_blocks[1]["name"] == "str_replace_editor" + class GenerateUsername(BaseModel): """Get a username based on someone's name and hair color.""" @@ -682,8 +714,74 @@ def test_pdf_document_input() -> None: assert len(result.content) > 0 -def test_citations() -> None: - llm = ChatAnthropic(model="claude-3-5-haiku-latest") # type: ignore[call-arg] +@pytest.mark.default_cassette("test_agent_loop.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["v0", "v1"]) +def test_agent_loop(output_version: Literal["v0", "v1"]) -> None: + @tool + def get_weather(location: str) -> str: + """Get the weather for a location.""" + return "It's sunny." + + llm = ChatAnthropic(model="claude-3-5-haiku-latest", output_version=output_version) # type: ignore[call-arg] + llm_with_tools = llm.bind_tools([get_weather]) + input_message = HumanMessage("What is the weather in San Francisco, CA?") + tool_call_message = llm_with_tools.invoke([input_message]) + assert isinstance(tool_call_message, AIMessage) + tool_calls = tool_call_message.tool_calls + assert len(tool_calls) == 1 + tool_call = tool_calls[0] + tool_message = get_weather.invoke(tool_call) + assert isinstance(tool_message, ToolMessage) + response = llm_with_tools.invoke( + [ + input_message, + tool_call_message, + tool_message, + ] + ) + assert isinstance(response, AIMessage) + + +@pytest.mark.default_cassette("test_agent_loop_streaming.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["v0", "v1"]) +def test_agent_loop_streaming(output_version: Literal["v0", "v1"]) -> None: + @tool + def get_weather(location: str) -> str: + """Get the weather for a location.""" + return "It's sunny." + + llm = ChatAnthropic( + model="claude-3-5-haiku-latest", + streaming=True, + output_version=output_version, # type: ignore[call-arg] + ) + llm_with_tools = llm.bind_tools([get_weather]) + input_message = HumanMessage("What is the weather in San Francisco, CA?") + tool_call_message = llm_with_tools.invoke([input_message]) + assert isinstance(tool_call_message, AIMessage) + + tool_calls = tool_call_message.tool_calls + assert len(tool_calls) == 1 + tool_call = tool_calls[0] + tool_message = get_weather.invoke(tool_call) + assert isinstance(tool_message, ToolMessage) + response = llm_with_tools.invoke( + [ + input_message, + tool_call_message, + tool_message, + ] + ) + assert isinstance(response, AIMessage) + + +@pytest.mark.default_cassette("test_citations.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["v0", "v1"]) +def test_citations(output_version: Literal["v0", "v1"]) -> None: + llm = ChatAnthropic(model="claude-3-5-haiku-latest", output_version=output_version) # type: ignore[call-arg] messages = [ { "role": "user", @@ -706,7 +804,10 @@ def test_citations() -> None: response = llm.invoke(messages) assert isinstance(response, AIMessage) assert isinstance(response.content, list) - assert any("citations" in block for block in response.content) + if output_version == "v1": + assert any("annotations" in block for block in response.content) + else: + assert any("citations" in block for block in response.content) # Test streaming full: Optional[BaseMessageChunk] = None @@ -714,8 +815,11 @@ def test_citations() -> None: full = cast(BaseMessageChunk, chunk) if full is None else full + chunk assert isinstance(full, AIMessageChunk) assert isinstance(full.content, list) - assert any("citations" in block for block in full.content) assert not any("citation" in block for block in full.content) + if output_version == "v1": + assert any("annotations" in block for block in full.content) + else: + assert any("citations" in block for block in full.content) # Test pass back in next_message = { @@ -762,25 +866,77 @@ def test_thinking() -> None: _ = llm.invoke([input_message, full, next_message]) +@pytest.mark.default_cassette("test_thinking.yaml.gz") @pytest.mark.vcr -def test_redacted_thinking() -> None: +def test_thinking_v1() -> None: llm = ChatAnthropic( model="claude-3-7-sonnet-latest", # type: ignore[call-arg] max_tokens=5_000, # type: ignore[call-arg] thinking={"type": "enabled", "budget_tokens": 2_000}, + output_version="v1", + ) + + input_message = {"role": "user", "content": "Hello"} + response = llm.invoke([input_message]) + assert any("reasoning" in block for block in response.content) + for block in response.content: + assert isinstance(block, dict) + if block["type"] == "reasoning": + assert set(block.keys()) == {"type", "reasoning", "extras"} + assert block["reasoning"] and isinstance(block["reasoning"], str) + signature = block["extras"]["signature"] + assert signature and isinstance(signature, str) + + # Test streaming + full: Optional[BaseMessageChunk] = None + for chunk in llm.stream([input_message]): + full = cast(BaseMessageChunk, chunk) if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert isinstance(full.content, list) + assert any("reasoning" in block for block in full.content) + for block in full.content: + assert isinstance(block, dict) + if block["type"] == "reasoning": + assert set(block.keys()) == {"type", "reasoning", "extras", "index"} + assert block["reasoning"] and isinstance(block["reasoning"], str) + signature = block["extras"]["signature"] + assert signature and isinstance(signature, str) + + # Test pass back in + next_message = {"role": "user", "content": "How are you?"} + _ = llm.invoke([input_message, full, next_message]) + + +@pytest.mark.default_cassette("test_redacted_thinking.yaml.gz") +@pytest.mark.vcr +@pytest.mark.parametrize("output_version", ["v0", "v1"]) +def test_redacted_thinking(output_version: Literal["v0", "v1"]) -> None: + llm = ChatAnthropic( + model="claude-3-7-sonnet-latest", # type: ignore[call-arg] + max_tokens=5_000, # type: ignore[call-arg] + thinking={"type": "enabled", "budget_tokens": 2_000}, + output_version=output_version, ) query = "ANTHROPIC_MAGIC_STRING_TRIGGER_REDACTED_THINKING_46C9A13E193C177646C7398A98432ECCCE4C1253D5E2D82641AC0E52CC2876CB" # noqa: E501 input_message = {"role": "user", "content": query} response = llm.invoke([input_message]) - has_reasoning = False + value = None for block in response.content: assert isinstance(block, dict) if block["type"] == "redacted_thinking": - has_reasoning = True - assert set(block.keys()) == {"type", "data"} - assert block["data"] and isinstance(block["data"], str) - assert has_reasoning + value = block + elif ( + block["type"] == "non_standard" + and block["value"]["type"] == "redacted_thinking" + ): + value = block["value"] + else: + pass + if value: + assert set(value.keys()) == {"type", "data"} + assert value["data"] and isinstance(value["data"], str) + assert value is not None # Test streaming full: Optional[BaseMessageChunk] = None @@ -788,14 +944,25 @@ def test_redacted_thinking() -> None: full = cast(BaseMessageChunk, chunk) if full is None else full + chunk assert isinstance(full, AIMessageChunk) assert isinstance(full.content, list) - stream_has_reasoning = False + value = None for block in full.content: assert isinstance(block, dict) if block["type"] == "redacted_thinking": - stream_has_reasoning = True - assert set(block.keys()) == {"type", "data", "index"} - assert block["data"] and isinstance(block["data"], str) - assert stream_has_reasoning + value = block + assert set(value.keys()) == {"type", "data", "index"} + assert "index" in block + elif ( + block["type"] == "non_standard" + and block["value"]["type"] == "redacted_thinking" + ): + value = block["value"] + assert set(value.keys()) == {"type", "data"} + assert "index" in block + else: + pass + if value: + assert value["data"] and isinstance(value["data"], str) + assert value is not None # Test pass back in next_message = {"role": "user", "content": "What?"} @@ -899,11 +1066,14 @@ class color_picker(BaseModel): llm.bind_tools([color_picker]).invoke(messages) +@pytest.mark.default_cassette("test_web_search.yaml.gz") @pytest.mark.vcr -def test_web_search() -> None: +@pytest.mark.parametrize("output_version", ["v0", "v1"]) +def test_web_search(output_version: Literal["v0", "v1"]) -> None: llm = ChatAnthropic( model="claude-3-5-sonnet-latest", # type: ignore[call-arg] max_tokens=1024, + output_version=output_version, ) tool = {"type": "web_search_20250305", "name": "web_search", "max_uses": 1} @@ -921,7 +1091,10 @@ def test_web_search() -> None: response = llm_with_tools.invoke([input_message]) assert all(isinstance(block, dict) for block in response.content) block_types = {block["type"] for block in response.content} # type: ignore[index] - assert block_types == {"text", "server_tool_use", "web_search_tool_result"} + if output_version == "v0": + assert block_types == {"text", "server_tool_use", "web_search_tool_result"} + else: + assert block_types == {"text", "web_search_call", "web_search_result"} # Test streaming full: Optional[BaseMessageChunk] = None @@ -931,7 +1104,10 @@ def test_web_search() -> None: assert isinstance(full, AIMessageChunk) assert isinstance(full.content, list) block_types = {block["type"] for block in full.content} # type: ignore[index] - assert block_types == {"text", "server_tool_use", "web_search_tool_result"} + if output_version == "v0": + assert block_types == {"text", "server_tool_use", "web_search_tool_result"} + else: + assert block_types == {"text", "web_search_call", "web_search_result"} # Test we can pass back in next_message = { @@ -943,12 +1119,15 @@ def test_web_search() -> None: ) +@pytest.mark.default_cassette("test_code_execution.yaml.gz") @pytest.mark.vcr -def test_code_execution() -> None: +@pytest.mark.parametrize("output_version", ["v0", "v1"]) +def test_code_execution(output_version: Literal["v0", "v1"]) -> None: llm = ChatAnthropic( model="claude-sonnet-4-20250514", # type: ignore[call-arg] betas=["code-execution-2025-05-22"], max_tokens=10_000, # type: ignore[call-arg] + output_version=output_version, ) tool = {"type": "code_execution_20250522", "name": "code_execution"} @@ -969,7 +1148,14 @@ def test_code_execution() -> None: response = llm_with_tools.invoke([input_message]) assert all(isinstance(block, dict) for block in response.content) block_types = {block["type"] for block in response.content} # type: ignore[index] - assert block_types == {"text", "server_tool_use", "code_execution_tool_result"} + if output_version == "v0": + assert block_types == {"text", "server_tool_use", "code_execution_tool_result"} + else: + assert block_types == { + "text", + "code_interpreter_call", + "code_interpreter_result", + } # Test streaming full: Optional[BaseMessageChunk] = None @@ -979,7 +1165,14 @@ def test_code_execution() -> None: assert isinstance(full, AIMessageChunk) assert isinstance(full.content, list) block_types = {block["type"] for block in full.content} # type: ignore[index] - assert block_types == {"text", "server_tool_use", "code_execution_tool_result"} + if output_version == "v0": + assert block_types == {"text", "server_tool_use", "code_execution_tool_result"} + else: + assert block_types == { + "text", + "code_interpreter_call", + "code_interpreter_result", + } # Test we can pass back in next_message = { @@ -991,8 +1184,10 @@ def test_code_execution() -> None: ) +@pytest.mark.default_cassette("test_remote_mcp.yaml.gz") @pytest.mark.vcr -def test_remote_mcp() -> None: +@pytest.mark.parametrize("output_version", ["v0", "v1"]) +def test_remote_mcp(output_version: Literal["v0", "v1"]) -> None: mcp_servers = [ { "type": "url", @@ -1008,6 +1203,7 @@ def test_remote_mcp() -> None: betas=["mcp-client-2025-04-04"], mcp_servers=mcp_servers, max_tokens=10_000, # type: ignore[call-arg] + output_version=output_version, ) input_message = { @@ -1025,7 +1221,10 @@ def test_remote_mcp() -> None: response = llm.invoke([input_message]) assert all(isinstance(block, dict) for block in response.content) block_types = {block["type"] for block in response.content} # type: ignore[index] - assert block_types == {"text", "mcp_tool_use", "mcp_tool_result"} + if output_version == "v0": + assert block_types == {"text", "mcp_tool_use", "mcp_tool_result"} + else: + assert block_types == {"text", "non_standard"} # Test streaming full: Optional[BaseMessageChunk] = None @@ -1036,7 +1235,10 @@ def test_remote_mcp() -> None: assert isinstance(full.content, list) assert all(isinstance(block, dict) for block in full.content) block_types = {block["type"] for block in full.content} # type: ignore[index] - assert block_types == {"text", "mcp_tool_use", "mcp_tool_result"} + if output_version == "v0": + assert block_types == {"text", "mcp_tool_use", "mcp_tool_result"} + else: + assert block_types == {"text", "non_standard"} # Test we can pass back in next_message = { @@ -1069,8 +1271,7 @@ def test_files_api_image(block_format: str) -> None: # standard block format block = { "type": "image", - "source_type": "id", - "id": image_file_id, + "file_id": image_file_id, } input_message = { "role": "user", @@ -1097,8 +1298,7 @@ def test_files_api_pdf(block_format: str) -> None: # standard block format block = { "type": "file", - "source_type": "id", - "id": pdf_file_id, + "file_id": pdf_file_id, } input_message = { "role": "user", @@ -1163,6 +1363,11 @@ def retrieval_tool(query: str) -> list[dict]: assert isinstance(result.content, list) assert any("citations" in block for block in result.content) + assert ( + _convert_from_v1_to_anthropic(result.content_blocks, [], "anthropic") + == result.content + ) + def test_search_result_top_level() -> None: llm = ChatAnthropic( @@ -1209,6 +1414,11 @@ def test_search_result_top_level() -> None: assert isinstance(result.content, list) assert any("citations" in block for block in result.content) + assert ( + _convert_from_v1_to_anthropic(result.content_blocks, [], "anthropic") + == result.content + ) + def test_async_shared_client() -> None: llm = ChatAnthropic(model="claude-3-5-haiku-latest") # type: ignore[call-arg] diff --git a/libs/partners/anthropic/tests/unit_tests/__snapshots__/test_standard.ambr b/libs/partners/anthropic/tests/unit_tests/__snapshots__/test_standard.ambr index b831aef469b44..5c9164caae365 100644 --- a/libs/partners/anthropic/tests/unit_tests/__snapshots__/test_standard.ambr +++ b/libs/partners/anthropic/tests/unit_tests/__snapshots__/test_standard.ambr @@ -20,6 +20,7 @@ 'max_retries': 2, 'max_tokens': 100, 'model': 'claude-3-haiku-20240307', + 'output_version': 'v0', 'stop_sequences': list([ ]), 'stream_usage': True, diff --git a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py index 382d2f774c5dc..3cf2b0e44ee27 100644 --- a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py @@ -211,6 +211,7 @@ def test__format_output() -> None: "total_tokens": 3, "input_token_details": {}, }, + response_metadata={"model_provider": "anthropic"}, ) llm = ChatAnthropic(model="test", anthropic_api_key="test") # type: ignore[call-arg, call-arg] actual = llm._format_output(anthropic_msg) @@ -241,6 +242,7 @@ def test__format_output_cached() -> None: "total_tokens": 10, "input_token_details": {"cache_creation": 3, "cache_read": 4}, }, + response_metadata={"model_provider": "anthropic"}, ) llm = ChatAnthropic(model="test", anthropic_api_key="test") # type: ignore[call-arg, call-arg] @@ -849,7 +851,7 @@ def test__format_messages_with_cache_control() -> None: assert expected_system == actual_system assert expected_messages == actual_messages - # Test standard multi-modal format + # Test standard multi-modal format (v0) messages = [ HumanMessage( [ @@ -891,6 +893,183 @@ def test__format_messages_with_cache_control() -> None: ] assert actual_messages == expected_messages + # Test standard multi-modal format (v1) + messages = [ + HumanMessage( + [ + { + "type": "text", + "text": "Summarize this document:", + }, + { + "type": "file", + "mime_type": "application/pdf", + "base64": "", + "extras": {"cache_control": {"type": "ephemeral"}}, + }, + ], + ), + ] + actual_system, actual_messages = _format_messages(messages) + assert actual_system is None + expected_messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Summarize this document:", + }, + { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": "", + }, + "cache_control": {"type": "ephemeral"}, + }, + ], + }, + ] + assert actual_messages == expected_messages + + # Test standard multi-modal format (v1, unpacked extras) + messages = [ + HumanMessage( + [ + { + "type": "text", + "text": "Summarize this document:", + }, + { + "type": "file", + "mime_type": "application/pdf", + "base64": "", + "cache_control": {"type": "ephemeral"}, + }, + ], + ), + ] + actual_system, actual_messages = _format_messages(messages) + assert actual_system is None + expected_messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Summarize this document:", + }, + { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": "", + }, + "cache_control": {"type": "ephemeral"}, + }, + ], + }, + ] + assert actual_messages == expected_messages + + # Also test file inputs + ## Images + for block in [ + # v1 + { + "type": "image", + "file_id": "abc123", + }, + # v0 + { + "type": "image", + "source_type": "id", + "id": "abc123", + }, + ]: + messages = [ + HumanMessage( + [ + { + "type": "text", + "text": "Summarize this image:", + }, + block, + ], + ), + ] + actual_system, actual_messages = _format_messages(messages) + assert actual_system is None + expected_messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Summarize this image:", + }, + { + "type": "image", + "source": { + "type": "file", + "file_id": "abc123", + }, + }, + ], + }, + ] + assert actual_messages == expected_messages + + ## Documents + for block in [ + # v1 + { + "type": "file", + "file_id": "abc123", + }, + # v0 + { + "type": "file", + "source_type": "id", + "id": "abc123", + }, + ]: + messages = [ + HumanMessage( + [ + { + "type": "text", + "text": "Summarize this document:", + }, + block, + ], + ), + ] + actual_system, actual_messages = _format_messages(messages) + assert actual_system is None + expected_messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Summarize this document:", + }, + { + "type": "document", + "source": { + "type": "file", + "file_id": "abc123", + }, + }, + ], + }, + ] + assert actual_messages == expected_messages + def test__format_messages_with_citations() -> None: input_messages = [ From 62d746e63096480e802208468d9c40ce00af1ea0 Mon Sep 17 00:00:00 2001 From: ccurme Date: Mon, 25 Aug 2025 10:37:41 -0300 Subject: [PATCH 45/73] feat(core): (v1) restore separate type for AIMessage.tool_calls (#32668) --- libs/core/langchain_core/messages/ai.py | 4 +- .../messages/block_translators/anthropic.py | 7 +- .../messages/block_translators/openai.py | 16 +- libs/core/langchain_core/messages/tool.py | 32 ++- .../core/tests/unit_tests/messages/test_ai.py | 10 + .../prompts/__snapshots__/test_chat.ambr | 50 +---- .../runnables/__snapshots__/test_graph.ambr | 25 +-- .../__snapshots__/test_runnable.ambr | 200 +----------------- .../langchain_anthropic/chat_models.py | 11 +- .../langchain_openai/chat_models/base.py | 14 +- .../tests/unit_tests/chat_models/test_base.py | 14 +- 11 files changed, 106 insertions(+), 277 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 31be4dbca4e0a..cc0e532f7c897 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -255,9 +255,9 @@ def content_blocks(self) -> list[types.ContentBlock]: "args": tool_call["args"], } if "index" in tool_call: - tool_call_block["index"] = tool_call["index"] + tool_call_block["index"] = tool_call["index"] # type: ignore[typeddict-item] if "extras" in tool_call: - tool_call_block["extras"] = tool_call["extras"] + tool_call_block["extras"] = tool_call["extras"] # type: ignore[typeddict-item] blocks.append(tool_call_block) return blocks diff --git a/libs/core/langchain_core/messages/block_translators/anthropic.py b/libs/core/langchain_core/messages/block_translators/anthropic.py index 8f0b3919fa452..655cfaa2857ef 100644 --- a/libs/core/langchain_core/messages/block_translators/anthropic.py +++ b/libs/core/langchain_core/messages/block_translators/anthropic.py @@ -237,7 +237,12 @@ def _iter_blocks() -> Iterable[types.ContentBlock]: not isinstance(message, AIMessageChunk) and len(message.tool_calls) == 1 ): - tool_call_block = message.tool_calls[0] + tool_call_block: types.ToolCall = { + "type": "tool_call", + "name": message.tool_calls[0]["name"], + "args": message.tool_calls[0]["args"], + "id": message.tool_calls[0].get("id"), + } if "index" in block: tool_call_block["index"] = block["index"] yield tool_call_block diff --git a/libs/core/langchain_core/messages/block_translators/openai.py b/libs/core/langchain_core/messages/block_translators/openai.py index b11e64558aad6..a9e5db98d9d54 100644 --- a/libs/core/langchain_core/messages/block_translators/openai.py +++ b/libs/core/langchain_core/messages/block_translators/openai.py @@ -30,7 +30,14 @@ def _convert_to_v1_from_chat_completions( content_blocks = [] for tool_call in message.tool_calls: - content_blocks.append(tool_call) + content_blocks.append( + { + "type": "tool_call", + "name": tool_call["name"], + "args": tool_call["args"], + "id": tool_call.get("id"), + } + ) return content_blocks @@ -287,7 +294,12 @@ def _iter_blocks() -> Iterable[types.ContentBlock]: elif call_id: for tool_call in message.tool_calls or []: if tool_call.get("id") == call_id: - tool_call_block = tool_call.copy() + tool_call_block = { + "type": "tool_call", + "name": tool_call["name"], + "args": tool_call["args"], + "id": tool_call.get("id"), + } break else: for invalid_tool_call in message.invalid_tool_calls or []: diff --git a/libs/core/langchain_core/messages/tool.py b/libs/core/langchain_core/messages/tool.py index fab0315de63ca..f55be142672b7 100644 --- a/libs/core/langchain_core/messages/tool.py +++ b/libs/core/langchain_core/messages/tool.py @@ -10,7 +10,6 @@ from langchain_core.messages import content as types from langchain_core.messages.base import BaseMessage, BaseMessageChunk, merge_content from langchain_core.messages.content import InvalidToolCall as InvalidToolCall -from langchain_core.messages.content import ToolCall as ToolCall from langchain_core.utils._merge import merge_dicts, merge_obj @@ -199,6 +198,37 @@ def __add__(self, other: Any) -> BaseMessageChunk: # type: ignore[override] return super().__add__(other) +class ToolCall(TypedDict): + """Represents a request to call a tool. + + Example: + + .. code-block:: python + + { + "name": "foo", + "args": {"a": 1}, + "id": "123" + } + + This represents a request to call the tool named "foo" with arguments {"a": 1} + and an identifier of "123". + + """ + + name: str + """The name of the tool to be called.""" + args: dict[str, Any] + """The arguments to the tool call.""" + id: Optional[str] + """An identifier associated with the tool call. + + An identifier is needed to associate a tool call request with a tool + call result in events when multiple concurrent tool calls are made. + """ + type: NotRequired[Literal["tool_call"]] + + def tool_call( *, name: str, diff --git a/libs/core/tests/unit_tests/messages/test_ai.py b/libs/core/tests/unit_tests/messages/test_ai.py index 4f623c0910c87..a0edf0b5714bc 100644 --- a/libs/core/tests/unit_tests/messages/test_ai.py +++ b/libs/core/tests/unit_tests/messages/test_ai.py @@ -203,6 +203,16 @@ def test_add_ai_message_chunks_usage() -> None: ) +def test_init_tool_calls() -> None: + # Test we add "type" key on init + msg = AIMessage("", tool_calls=[{"name": "foo", "args": {"a": "b"}, "id": "abc"}]) + assert len(msg.tool_calls) == 1 + assert msg.tool_calls[0]["type"] == "tool_call" + + # Test we can assign without adding type key + msg.tool_calls = [{"name": "bar", "args": {"c": "d"}, "id": "def"}] + + def test_content_blocks() -> None: message = AIMessage( "", diff --git a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr index 1ff3d7aec133f..7b28b1523c10d 100644 --- a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr +++ b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr @@ -1014,23 +1014,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -1042,17 +1031,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -1064,10 +1042,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -2485,23 +2462,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -2513,17 +2479,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -2535,10 +2490,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr index 4f6c54a28d452..3d9e96918bfc5 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr @@ -1417,23 +1417,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -1445,17 +1434,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -1467,10 +1445,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index 04bab565b38b3..ab07b2c2c0991 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -2959,23 +2959,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -2987,17 +2976,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -3008,10 +2986,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -4493,23 +4470,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -4521,17 +4487,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -4542,10 +4497,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -6039,23 +5993,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -6067,17 +6010,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -6088,10 +6020,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -7441,23 +7372,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -7469,17 +7389,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -7490,10 +7399,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -9017,23 +8925,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -9045,17 +8942,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -9066,10 +8952,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -10464,23 +10349,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -10492,17 +10366,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -10513,10 +10376,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -11959,23 +11821,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -11987,17 +11838,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -12008,10 +11848,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', @@ -13455,23 +13294,12 @@ This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". - - .. note:: - ``create_tool_call`` may also be used as a factory to create a - ``ToolCall``. Benefits include: - - * Automatic ID generation (when not provided) - * Required arguments strictly validated at creation time ''', 'properties': dict({ 'args': dict({ 'title': 'Args', 'type': 'object', }), - 'extras': dict({ - 'title': 'Extras', - 'type': 'object', - }), 'id': dict({ 'anyOf': list([ dict({ @@ -13483,17 +13311,6 @@ ]), 'title': 'Id', }), - 'index': dict({ - 'anyOf': list([ - dict({ - 'type': 'integer', - }), - dict({ - 'type': 'string', - }), - ]), - 'title': 'Index', - }), 'name': dict({ 'title': 'Name', 'type': 'string', @@ -13504,10 +13321,9 @@ }), }), 'required': list([ - 'type', - 'id', 'name', 'args', + 'id', ]), 'title': 'ToolCall', 'type': 'object', diff --git a/libs/partners/anthropic/langchain_anthropic/chat_models.py b/libs/partners/anthropic/langchain_anthropic/chat_models.py index b038ba09ff5d6..18651fb5f2c03 100644 --- a/libs/partners/anthropic/langchain_anthropic/chat_models.py +++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py @@ -1501,11 +1501,20 @@ def _get_request_payload( isinstance(message, AIMessage) and message.response_metadata.get("output_version") == "v1" ): + tcs: list[types.ToolCall] = [ + { + "type": "tool_call", + "name": tool_call["name"], + "args": tool_call["args"], + "id": tool_call.get("id"), + } + for tool_call in message.tool_calls + ] messages[idx] = message.model_copy( update={ "content": _convert_from_v1_to_anthropic( cast(list[types.ContentBlock], message.content), - message.tool_calls, + tcs, message.response_metadata.get("model_provider"), ) } diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 4db93be82b214..47ef23801f9d9 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -64,6 +64,7 @@ convert_to_openai_data_block, is_data_content_block, ) +from langchain_core.messages import content as types from langchain_core.messages.ai import ( InputTokenDetails, OutputTokenDetails, @@ -3748,9 +3749,16 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list: if isinstance(msg.get("content"), list) and all( isinstance(block, dict) for block in msg["content"] ): - msg["content"] = _convert_from_v1_to_responses( - msg["content"], lc_msg.tool_calls - ) + tcs: list[types.ToolCall] = [ + { + "type": "tool_call", + "name": tool_call["name"], + "args": tool_call["args"], + "id": tool_call.get("id"), + } + for tool_call in lc_msg.tool_calls + ] + msg["content"] = _convert_from_v1_to_responses(msg["content"], tcs) else: msg = _convert_message_to_dict(lc_msg) # Get content from non-standard content blocks diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index c68c86cef705b..083f06cc3d1b7 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -20,6 +20,7 @@ ToolCall, ToolMessage, ) +from langchain_core.messages import content as types from langchain_core.messages.ai import UsageMetadata from langchain_core.outputs import ChatGeneration, ChatResult from langchain_core.runnables import RunnableLambda @@ -2575,9 +2576,16 @@ def test_convert_from_v1_to_chat_completions( def test_convert_from_v1_to_responses( message_v1: AIMessage, expected: list[dict[str, Any]] ) -> None: - result = _convert_from_v1_to_responses( - message_v1.content_blocks, message_v1.tool_calls - ) + tcs: list[types.ToolCall] = [ + { + "type": "tool_call", + "name": tool_call["name"], + "args": tool_call["args"], + "id": tool_call.get("id"), + } + for tool_call in message_v1.tool_calls + ] + result = _convert_from_v1_to_responses(message_v1.content_blocks, tcs) assert result == expected # Check no mutation From 4e0fd330aa8462284e35a4c457814461a42be61f Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Mon, 25 Aug 2025 14:10:21 -0400 Subject: [PATCH 46/73] fix: update `content_blocks` property docstring --- libs/core/langchain_core/messages/base.py | 26 ++++++++++++++++------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 89008c8c42998..68d81fcb8b6dd 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -108,19 +108,29 @@ def get_lc_namespace(cls) -> list[str]: @property def content_blocks(self) -> list[types.ContentBlock]: - """Return the content as a list of standard ``ContentBlock``s. + r"""Return ``content`` as a list of standardized :class:`~langchain_core.messages.content.ContentBlock`\s. - To use this property, the corresponding chat model must support - ``message_version='v1'`` or higher: + .. important:: - .. code-block:: python + To use this property correctly, the corresponding ``ChatModel`` must support + ``message_version='v1'`` or higher (and it must be set): - from langchain.chat_models import init_chat_model - llm = init_chat_model("...", message_version="v1") + .. code-block:: python - Otherwise, does best-effort parsing to standard types. + from langchain.chat_models import init_chat_model + llm = init_chat_model("...", message_version="v1") - """ + # or + + from langchain-openai import ChatOpenAI + llm = ChatOpenAI(model="gpt-4o", message_version="v1") + + Otherwise, the property will perform best-effort parsing to standard types, + though some content may be misinterpreted. + + .. versionadded:: 1.0.0 + + """ # noqa: E501 from langchain_core.messages import content as types from langchain_core.messages.block_translators.anthropic import ( _convert_to_v1_from_anthropic_input, From 5ef18e8f5bb99c43fd1165cbe681497e15d507d0 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Mon, 25 Aug 2025 14:51:40 -0400 Subject: [PATCH 47/73] feat(core): add `.text` property, introduce `TextAccessor` for backward compatibility with text access methods (#32672) --- libs/core/langchain_core/messages/base.py | 84 +++++++++++++++++---- libs/core/tests/unit_tests/test_messages.py | 46 +++++++++++ 2 files changed, 116 insertions(+), 14 deletions(-) diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 68d81fcb8b6dd..384540d9f332e 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -5,7 +5,9 @@ from typing import TYPE_CHECKING, Any, Optional, Union, cast, overload from pydantic import ConfigDict, Field +from typing_extensions import Self +from langchain_core._api.deprecation import warn_deprecated from langchain_core.load.serializable import Serializable from langchain_core.messages import content as types from langchain_core.utils import get_bolded_text @@ -18,6 +20,52 @@ from langchain_core.prompts.chat import ChatPromptTemplate +class TextAccessor(str): + """String-like object that supports both property and method access patterns. + + Exists to maintain backward compatibility while transitioning from method-based to + property-based text access in message objects. In LangChain Self: + """Create new TextAccessor instance.""" + return str.__new__(cls, value) + + def __call__(self) -> str: + """Enable method-style text access for backward compatibility. + + This method exists solely to support legacy code that calls ``.text()`` + as a method. New code should use property access (``.text``) instead. + + .. deprecated:: 1.0.0 + Calling ``.text()`` as a method is deprecated. Use ``.text`` as a property + instead. This method will be removed in 2.0.0. + + Returns: + The string content, identical to property access. + + """ + warn_deprecated( + since="1.0.0", + message=( + "Calling .text() as a method is deprecated. " + "Use .text as a property instead (e.g., message.text)." + ), + removal="2.0.0", + ) + return str(self) + + class BaseMessage(Serializable): """Base abstract message class. @@ -169,25 +217,33 @@ def content_blocks(self) -> list[types.ContentBlock]: blocks = parsing_step(blocks) return blocks - def text(self) -> str: - """Get the text content of the message. + @property + def text(self) -> TextAccessor: + """Get the text content of the message as a string. + + Can be used as both property (``message.text``) and method (``message.text()``). + + .. deprecated:: 1.0.0 + Calling ``.text()`` as a method is deprecated. Use ``.text`` as a property + instead. This method will be removed in 2.0.0. Returns: The text content of the message. """ if isinstance(self.content, str): - return self.content - - # must be a list - blocks = [ - block - for block in self.content - if isinstance(block, str) - or (block.get("type") == "text" and isinstance(block.get("text"), str)) - ] - return "".join( - block if isinstance(block, str) else block["text"] for block in blocks - ) + text_value = self.content + else: + # must be a list + blocks = [ + block + for block in self.content + if isinstance(block, str) + or (block.get("type") == "text" and isinstance(block.get("text"), str)) + ] + text_value = "".join( + block if isinstance(block, str) else block["text"] for block in blocks + ) + return TextAccessor(text_value) def __add__(self, other: Any) -> ChatPromptTemplate: """Concatenate this message with another message.""" diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index 3de287a287c1e..d00e8af3496ac 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -1306,3 +1306,49 @@ def test_typed_init() -> None: content_blocks=[{"type": "text", "bad": "Hello"}], # type: ignore[list-item] tool_call_id="abc123", ) + + +def test_text_accessor() -> None: + """Test that `message.text` property and `.text()` method return the same value.""" + human_msg = HumanMessage(content="Hello world") + assert human_msg.text == "Hello world" + assert human_msg.text() == "Hello world" + assert str(human_msg.text) == str(human_msg.text()) + + system_msg = SystemMessage(content="You are a helpful assistant") + assert system_msg.text == "You are a helpful assistant" + assert system_msg.text() == "You are a helpful assistant" + assert str(system_msg.text) == str(system_msg.text()) + + ai_msg = AIMessage(content="I can help you with that") + assert ai_msg.text == "I can help you with that" + assert ai_msg.text() == "I can help you with that" + assert str(ai_msg.text) == str(ai_msg.text()) + + tool_msg = ToolMessage(content="Task completed", tool_call_id="tool_1") + assert tool_msg.text == "Task completed" + assert tool_msg.text() == "Task completed" + assert str(tool_msg.text) == str(tool_msg.text()) + + complex_msg = HumanMessage( + content=[{"type": "text", "text": "Hello "}, {"type": "text", "text": "world"}] + ) + assert complex_msg.text == "Hello world" + assert complex_msg.text() == "Hello world" + assert str(complex_msg.text) == str(complex_msg.text()) + + mixed_msg = AIMessage( + content=[ + {"type": "text", "text": "The answer is "}, + {"type": "tool_use", "name": "calculate", "input": {"x": 2}, "id": "1"}, + {"type": "text", "text": "42"}, + ] + ) + assert mixed_msg.text == "The answer is 42" + assert mixed_msg.text() == "The answer is 42" + assert str(mixed_msg.text) == str(mixed_msg.text()) + + empty_msg = HumanMessage(content=[]) + assert empty_msg.text == "" + assert empty_msg.text() == "" + assert str(empty_msg.text) == str(empty_msg.text()) From 97bd2cf938fdde6cc4604c4bc880cffd0af77468 Mon Sep 17 00:00:00 2001 From: ccurme Date: Mon, 25 Aug 2025 16:15:03 -0300 Subject: [PATCH 48/73] fix(core): (v1) fix PDF input translation for openai chat completions (#32673) --- .../messages/block_translators/langchain_v0.py | 6 ++++-- .../unit_tests/language_models/chat_models/test_base.py | 2 +- .../unit_tests/messages/block_translators/test_openai.py | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/libs/core/langchain_core/messages/block_translators/langchain_v0.py b/libs/core/langchain_core/messages/block_translators/langchain_v0.py index 5fde4c0fcb0d4..bd7c7e89aa0c0 100644 --- a/libs/core/langchain_core/messages/block_translators/langchain_v0.py +++ b/libs/core/langchain_core/messages/block_translators/langchain_v0.py @@ -272,7 +272,9 @@ def _extract_extras(block_dict: dict, known_keys: set[str]) -> dict[str, Any]: ) # base64-style file block - if block["type"] == "file": + if (block["type"] == "file") and ( + parsed := _parse_data_uri(block["file"]["file_data"]) + ): known_keys = {"type", "file"} extras = _extract_extras(block, known_keys) @@ -285,7 +287,7 @@ def _extract_extras(block_dict: dict, known_keys: set[str]) -> dict[str, Any]: filename = block["file"].get("filename") return types.create_file_block( - base64=block["file"]["file_data"], + base64=parsed["data"], mime_type="application/pdf", filename=filename, **all_extras, diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index 22d8bc7907f5e..93ba9b218d187 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -628,7 +628,7 @@ def test_extend_support_to_openai_multimodal_formats() -> None: "type": "file", "file": { "filename": "draconomicon.pdf", - "file_data": "", + "file_data": "data:application/pdf;base64,", }, }, { # file-id diff --git a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py index 2ed2086ea4443..924252dae2b49 100644 --- a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py +++ b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py @@ -257,7 +257,7 @@ def test_convert_to_v1_from_openai_input() -> None: "type": "file", "file": { "filename": "draconomicon.pdf", - "file_data": "", + "file_data": "data:application/pdf;base64,", }, }, { From fe9599f118faf0861e618f23c994724d54a952ac Mon Sep 17 00:00:00 2001 From: ccurme Date: Mon, 25 Aug 2025 16:38:22 -0300 Subject: [PATCH 49/73] feat(core): parse `tool_call_chunks` in content in aggregated stream (#32664) --- .../language_models/chat_models.py | 79 ++++++++++++++- .../language_models/fake_chat_models.py | 24 ++++- libs/core/langchain_core/messages/ai.py | 44 ++++++++- .../messages/block_translators/anthropic.py | 51 ++++++---- .../messages/block_translators/openai.py | 33 +++++-- libs/core/langchain_core/messages/utils.py | 10 +- .../unit_tests/fake/test_fake_chat_model.py | 14 ++- .../language_models/chat_models/test_base.py | 26 +++-- .../core/tests/unit_tests/messages/test_ai.py | 46 +++++++++ .../prompts/__snapshots__/test_chat.ambr | 26 +++++ .../runnables/__snapshots__/test_graph.ambr | 13 +++ .../__snapshots__/test_runnable.ambr | 96 ++++++++++++++++++ .../unit_tests/runnables/test_runnable.py | 4 +- .../runnables/test_runnable_events_v1.py | 97 ++++++++++++++++--- .../runnables/test_runnable_events_v2.py | 93 +++++++++++++++--- .../tests/unit_tests/agents/test_agent.py | 7 ++ .../tests/unit_tests/llms/fake_chat_model.py | 8 +- .../unit_tests/llms/test_fake_chat_model.py | 11 ++- .../langchain_anthropic/chat_models.py | 2 + .../integration_tests/test_chat_models.py | 2 +- .../langchain_openai/chat_models/base.py | 7 +- .../chat_models/test_base.py | 9 +- .../chat_models/test_responses_api.py | 5 + 23 files changed, 612 insertions(+), 95 deletions(-) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 10c7de268c078..c08d73c33954e 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -556,6 +556,7 @@ def stream( try: input_messages = _normalize_messages(messages) run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id))) + yielded = False for chunk in self._stream(input_messages, stop=stop, **kwargs): if chunk.message.id is None: chunk.message.id = run_id @@ -570,6 +571,21 @@ def stream( ) chunks.append(chunk) yield cast("AIMessageChunk", chunk.message) + yielded = True + + # Yield a final empty chunk with chunk_position="last" if not yet + # yielded + if ( + yielded + and isinstance(chunk.message, AIMessageChunk) + and not chunk.message.chunk_position + ): + empty_content: Union[str, list] = ( + "" if isinstance(chunk.message.content, str) else [] + ) + yield AIMessageChunk( + content=empty_content, chunk_position="last", id=run_id + ) except BaseException as e: generations_with_error_metadata = _generate_response_from_error(e) chat_generation_chunk = merge_chat_generation_chunks(chunks) @@ -654,6 +670,7 @@ async def astream( try: input_messages = _normalize_messages(messages) run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id))) + yielded = False async for chunk in self._astream( input_messages, stop=stop, @@ -672,6 +689,20 @@ async def astream( ) chunks.append(chunk) yield cast("AIMessageChunk", chunk.message) + yielded = True + + # Yield a final empty chunk with chunk_position="last" if not yet yielded + if ( + yielded + and isinstance(chunk.message, AIMessageChunk) + and not chunk.message.chunk_position + ): + empty_content: Union[str, list] = ( + "" if isinstance(chunk.message.content, str) else [] + ) + yield AIMessageChunk( + content=empty_content, chunk_position="last", id=run_id + ) except BaseException as e: generations_with_error_metadata = _generate_response_from_error(e) chat_generation_chunk = merge_chat_generation_chunks(chunks) @@ -1120,11 +1151,15 @@ def _generate_with_cache( **kwargs, ): chunks: list[ChatGenerationChunk] = [] + run_id: Optional[str] = ( + f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None + ) + yielded = False for chunk in self._stream(messages, stop=stop, **kwargs): chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) if run_manager: if chunk.message.id is None: - chunk.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}" + chunk.message.id = run_id if self.output_version == "v1": # Overwrite .content with .content_blocks chunk.message = _update_message_content_to_blocks( @@ -1134,6 +1169,24 @@ def _generate_with_cache( cast("str", chunk.message.content), chunk=chunk ) chunks.append(chunk) + yielded = True + + # Yield a final empty chunk with chunk_position="last" if not yet yielded + if ( + yielded + and isinstance(chunk.message, AIMessageChunk) + and not chunk.message.chunk_position + ): + empty_content: Union[str, list] = ( + "" if isinstance(chunk.message.content, str) else [] + ) + chunks.append( + ChatGenerationChunk( + message=AIMessageChunk( + content=empty_content, chunk_position="last", id=run_id + ) + ) + ) result = generate_from_stream(iter(chunks)) elif inspect.signature(self._generate).parameters.get("run_manager"): result = self._generate( @@ -1205,11 +1258,15 @@ async def _agenerate_with_cache( **kwargs, ): chunks: list[ChatGenerationChunk] = [] + run_id: Optional[str] = ( + f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None + ) + yielded = False async for chunk in self._astream(messages, stop=stop, **kwargs): chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) if run_manager: if chunk.message.id is None: - chunk.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}" + chunk.message.id = run_id if self.output_version == "v1": # Overwrite .content with .content_blocks chunk.message = _update_message_content_to_blocks( @@ -1219,6 +1276,24 @@ async def _agenerate_with_cache( cast("str", chunk.message.content), chunk=chunk ) chunks.append(chunk) + yielded = True + + # Yield a final empty chunk with chunk_position="last" if not yet yielded + if ( + yielded + and isinstance(chunk.message, AIMessageChunk) + and not chunk.message.chunk_position + ): + empty_content: Union[str, list] = ( + "" if isinstance(chunk.message.content, str) else [] + ) + chunks.append( + ChatGenerationChunk( + message=AIMessageChunk( + content=empty_content, chunk_position="last", id=run_id + ) + ) + ) result = generate_from_stream(iter(chunks)) elif inspect.signature(self._agenerate).parameters.get("run_manager"): result = await self._agenerate( diff --git a/libs/core/langchain_core/language_models/fake_chat_models.py b/libs/core/langchain_core/language_models/fake_chat_models.py index 3430db60f2d43..17306dd17d438 100644 --- a/libs/core/langchain_core/language_models/fake_chat_models.py +++ b/libs/core/langchain_core/language_models/fake_chat_models.py @@ -4,7 +4,7 @@ import re import time from collections.abc import AsyncIterator, Iterator -from typing import Any, Optional, Union, cast +from typing import Any, Literal, Optional, Union, cast from typing_extensions import override @@ -112,7 +112,12 @@ def _stream( ): raise FakeListChatModelError - yield ChatGenerationChunk(message=AIMessageChunk(content=c)) + chunk_position: Optional[Literal["last"]] = ( + "last" if i_c == len(response) - 1 else None + ) + yield ChatGenerationChunk( + message=AIMessageChunk(content=c, chunk_position=chunk_position) + ) @override async def _astream( @@ -135,7 +140,12 @@ async def _astream( and i_c == self.error_on_chunk_number ): raise FakeListChatModelError - yield ChatGenerationChunk(message=AIMessageChunk(content=c)) + chunk_position: Optional[Literal["last"]] = ( + "last" if i_c == len(response) - 1 else None + ) + yield ChatGenerationChunk( + message=AIMessageChunk(content=c, chunk_position=chunk_position) + ) @property @override @@ -283,10 +293,16 @@ def _stream( content_chunks = cast("list[str]", re.split(r"(\s)", content)) - for token in content_chunks: + for idx, token in enumerate(content_chunks): chunk = ChatGenerationChunk( message=AIMessageChunk(content=token, id=message.id) ) + if ( + idx == len(content_chunks) - 1 + and isinstance(chunk.message, AIMessageChunk) + and not message.additional_kwargs + ): + chunk.message.chunk_position = "last" if run_manager: run_manager.on_llm_new_token(token, chunk=chunk) yield chunk diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index cc0e532f7c897..d6ab74e2df97b 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -363,6 +363,13 @@ class AIMessageChunk(AIMessage, BaseMessageChunk): tool_call_chunks: list[ToolCallChunk] = [] """If provided, tool call chunks associated with the message.""" + chunk_position: Optional[Literal["last"]] = None + """Optional span represented by an aggregated AIMessageChunk. + + If a chunk with ``chunk_position="last"`` is aggregated into a stream, + ``tool_call_chunks`` in message content will be parsed into ``tool_calls``. + """ + @property def lc_attributes(self) -> dict: """Attrs to be serialized even if they are derived from other init args.""" @@ -391,7 +398,11 @@ def content_blocks(self) -> list[types.ContentBlock]: # Otherwise, use best-effort parsing blocks = super().content_blocks - if self.tool_call_chunks and not self.content: + if ( + self.tool_call_chunks + and not self.content + and self.chunk_position != "last" # keep tool_calls if aggregated + ): blocks = [ block for block in blocks @@ -480,6 +491,32 @@ def add_chunk_to_invalid_tool_calls(chunk: ToolCallChunk) -> None: add_chunk_to_invalid_tool_calls(chunk) self.tool_calls = tool_calls self.invalid_tool_calls = invalid_tool_calls + + if ( + self.chunk_position == "last" + and self.tool_call_chunks + and self.response_metadata.get("output_version") == "v1" + and isinstance(self.content, list) + ): + id_to_tc: dict[str, types.ToolCall] = { + cast("str", tc.get("id")): { + "type": "tool_call", + "name": tc["name"], + "args": tc["args"], + "id": tc.get("id"), + } + for tc in self.tool_calls + if "id" in tc + } + for idx, block in enumerate(self.content): + if ( + isinstance(block, dict) + and block.get("type") == "tool_call_chunk" + and (call_id := block.get("id")) + and call_id in id_to_tc + ): + self.content[idx] = cast("dict[str, Any]", id_to_tc[call_id]) + return self @overload # type: ignore[override] # summing BaseMessages gives ChatPromptTemplate @@ -566,6 +603,10 @@ def add_ai_message_chunks( chunk_id = id_ break + chunk_position: Optional[Literal["last"]] = ( + "last" if any(x.chunk_position == "last" for x in [left, *others]) else None + ) + return left.__class__( example=left.example, content=content, @@ -574,6 +615,7 @@ def add_ai_message_chunks( response_metadata=response_metadata, usage_metadata=usage_metadata, id=chunk_id, + chunk_position=chunk_position, ) diff --git a/libs/core/langchain_core/messages/block_translators/anthropic.py b/libs/core/langchain_core/messages/block_translators/anthropic.py index 655cfaa2857ef..1e11386598184 100644 --- a/libs/core/langchain_core/messages/block_translators/anthropic.py +++ b/libs/core/langchain_core/messages/block_translators/anthropic.py @@ -2,7 +2,7 @@ import json from collections.abc import Iterable -from typing import Any, cast +from typing import Any, Optional, cast from langchain_core.messages import AIMessage, AIMessageChunk from langchain_core.messages import content as types @@ -226,34 +226,47 @@ def _iter_blocks() -> Iterable[types.ContentBlock]: if ( isinstance(message, AIMessageChunk) and len(message.tool_call_chunks) == 1 + and message.chunk_position != "last" ): + # Isolated chunk tool_call_chunk: types.ToolCallChunk = ( message.tool_call_chunks[0].copy() # type: ignore[assignment] ) if "type" not in tool_call_chunk: tool_call_chunk["type"] = "tool_call_chunk" yield tool_call_chunk - elif ( - not isinstance(message, AIMessageChunk) - and len(message.tool_calls) == 1 - ): - tool_call_block: types.ToolCall = { - "type": "tool_call", - "name": message.tool_calls[0]["name"], - "args": message.tool_calls[0]["args"], - "id": message.tool_calls[0].get("id"), - } + else: + tool_call_block: Optional[types.ToolCall] = None + # Non-streaming or gathered chunk + if len(message.tool_calls) == 1: + tool_call_block = { + "type": "tool_call", + "name": message.tool_calls[0]["name"], + "args": message.tool_calls[0]["args"], + "id": message.tool_calls[0].get("id"), + } + elif call_id := block.get("id"): + for tc in message.tool_calls: + if tc.get("id") == call_id: + tool_call_block = { + "type": "tool_call", + "name": tc["name"], + "args": tc["args"], + "id": tc.get("id"), + } + break + else: + pass + if not tool_call_block: + tool_call_block = { + "type": "tool_call", + "name": block.get("name", ""), + "args": block.get("input", {}), + "id": block.get("id", ""), + } if "index" in block: tool_call_block["index"] = block["index"] yield tool_call_block - else: - tool_call_block = { - "type": "tool_call", - "name": block.get("name", ""), - "args": block.get("input", {}), - "id": block.get("id", ""), - } - yield tool_call_block elif ( block_type == "input_json_delta" diff --git a/libs/core/langchain_core/messages/block_translators/openai.py b/libs/core/langchain_core/messages/block_translators/openai.py index a9e5db98d9d54..10bf18c0a6fbf 100644 --- a/libs/core/langchain_core/messages/block_translators/openai.py +++ b/libs/core/langchain_core/messages/block_translators/openai.py @@ -99,16 +99,28 @@ def _convert_to_v1_from_chat_completions_chunk( else: content_blocks = [] - for tool_call_chunk in chunk.tool_call_chunks: - tc: types.ToolCallChunk = { - "type": "tool_call_chunk", - "id": tool_call_chunk.get("id"), - "name": tool_call_chunk.get("name"), - "args": tool_call_chunk.get("args"), - } - if (idx := tool_call_chunk.get("index")) is not None: - tc["index"] = idx - content_blocks.append(tc) + if chunk.chunk_position == "last": + for tool_call in chunk.tool_calls: + content_blocks.append( + { + "type": "tool_call", + "name": tool_call["name"], + "args": tool_call["args"], + "id": tool_call.get("id"), + } + ) + + else: + for tool_call_chunk in chunk.tool_call_chunks: + tc: types.ToolCallChunk = { + "type": "tool_call_chunk", + "id": tool_call_chunk.get("id"), + "name": tool_call_chunk.get("name"), + "args": tool_call_chunk.get("args"), + } + if (idx := tool_call_chunk.get("index")) is not None: + tc["index"] = idx + content_blocks.append(tc) return content_blocks @@ -289,6 +301,7 @@ def _iter_blocks() -> Iterable[types.ContentBlock]: if ( isinstance(message, AIMessageChunk) and len(message.tool_call_chunks) == 1 + and message.chunk_position != "last" ): tool_call_block = message.tool_call_chunks[0].copy() # type: ignore[assignment] elif call_id: diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py index 79c1c4b66c6c9..282f05a89fbe0 100644 --- a/libs/core/langchain_core/messages/utils.py +++ b/libs/core/langchain_core/messages/utils.py @@ -199,7 +199,7 @@ def message_chunk_to_message(chunk: BaseMessageChunk) -> BaseMessage: # chunk classes always have the equivalent non-chunk class as their first parent ignore_keys = ["type"] if isinstance(chunk, AIMessageChunk): - ignore_keys.append("tool_call_chunks") + ignore_keys.extend(["tool_call_chunks", "chunk_position"]) return chunk.__class__.__mro__[1]( **{k: v for k, v in chunk.__dict__.items() if k not in ignore_keys} ) @@ -1501,11 +1501,15 @@ def _msg_to_chunk(message: BaseMessage) -> BaseMessageChunk: def _chunk_to_msg(chunk: BaseMessageChunk) -> BaseMessage: if chunk.__class__ in _CHUNK_MSG_MAP: return _CHUNK_MSG_MAP[chunk.__class__]( - **chunk.model_dump(exclude={"type", "tool_call_chunks"}) + **chunk.model_dump(exclude={"type", "tool_call_chunks", "chunk_position"}) ) for chunk_cls, msg_cls in _CHUNK_MSG_MAP.items(): if isinstance(chunk, chunk_cls): - return msg_cls(**chunk.model_dump(exclude={"type", "tool_call_chunks"})) + return msg_cls( + **chunk.model_dump( + exclude={"type", "tool_call_chunks", "chunk_position"} + ) + ) msg = ( f"Unrecognized message chunk class {chunk.__class__}. Supported classes are " diff --git a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py index 0b8bf7a81a469..0e1944534d07d 100644 --- a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py +++ b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py @@ -59,7 +59,7 @@ async def test_generic_fake_chat_model_stream() -> None: assert chunks == [ _any_id_ai_message_chunk(content="hello"), _any_id_ai_message_chunk(content=" "), - _any_id_ai_message_chunk(content="goodbye"), + _any_id_ai_message_chunk(content="goodbye", chunk_position="last"), ] assert len({chunk.id for chunk in chunks}) == 1 @@ -67,7 +67,7 @@ async def test_generic_fake_chat_model_stream() -> None: assert chunks == [ _any_id_ai_message_chunk(content="hello"), _any_id_ai_message_chunk(content=" "), - _any_id_ai_message_chunk(content="goodbye"), + _any_id_ai_message_chunk(content="goodbye", chunk_position="last"), ] assert len({chunk.id for chunk in chunks}) == 1 @@ -79,6 +79,7 @@ async def test_generic_fake_chat_model_stream() -> None: assert chunks == [ _any_id_ai_message_chunk(content="", additional_kwargs={"foo": 42}), _any_id_ai_message_chunk(content="", additional_kwargs={"bar": 24}), + _any_id_ai_message_chunk(content="", chunk_position="last"), ] assert len({chunk.id for chunk in chunks}) == 1 @@ -97,7 +98,8 @@ async def test_generic_fake_chat_model_stream() -> None: assert chunks == [ _any_id_ai_message_chunk( - content="", additional_kwargs={"function_call": {"name": "move_file"}} + content="", + additional_kwargs={"function_call": {"name": "move_file"}}, ), _any_id_ai_message_chunk( content="", @@ -114,6 +116,7 @@ async def test_generic_fake_chat_model_stream() -> None: "function_call": {"arguments": '\n "destination_path": "bar"\n}'}, }, ), + _any_id_ai_message_chunk(content="", chunk_position="last"), ] assert len({chunk.id for chunk in chunks}) == 1 @@ -134,6 +137,7 @@ async def test_generic_fake_chat_model_stream() -> None: } }, id=chunks[0].id, + chunk_position="last", ) @@ -148,7 +152,7 @@ async def test_generic_fake_chat_model_astream_log() -> None: assert final.state["streamed_output"] == [ _any_id_ai_message_chunk(content="hello"), _any_id_ai_message_chunk(content=" "), - _any_id_ai_message_chunk(content="goodbye"), + _any_id_ai_message_chunk(content="goodbye", chunk_position="last"), ] assert len({chunk.id for chunk in final.state["streamed_output"]}) == 1 @@ -205,7 +209,7 @@ async def on_llm_new_token( assert results == [ _any_id_ai_message_chunk(content="hello"), _any_id_ai_message_chunk(content=" "), - _any_id_ai_message_chunk(content="goodbye"), + _any_id_ai_message_chunk(content="goodbye", chunk_position="last"), ] assert tokens == ["hello", " ", "goodbye"] assert len({chunk.id for chunk in results}) == 1 diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index 93ba9b218d187..1176d683f1130 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -233,7 +233,9 @@ def _stream( ) -> Iterator[ChatGenerationChunk]: """Stream the output of the model.""" yield ChatGenerationChunk(message=AIMessageChunk(content="a")) - yield ChatGenerationChunk(message=AIMessageChunk(content="b")) + yield ChatGenerationChunk( + message=AIMessageChunk(content="b", chunk_position="last") + ) @property def _llm_type(self) -> str: @@ -242,15 +244,19 @@ def _llm_type(self) -> str: model = ModelWithSyncStream() chunks = list(model.stream("anything")) assert chunks == [ - _any_id_ai_message_chunk(content="a"), - _any_id_ai_message_chunk(content="b"), + _any_id_ai_message_chunk( + content="a", + ), + _any_id_ai_message_chunk(content="b", chunk_position="last"), ] assert len({chunk.id for chunk in chunks}) == 1 assert type(model)._astream == BaseChatModel._astream astream_chunks = [chunk async for chunk in model.astream("anything")] assert astream_chunks == [ - _any_id_ai_message_chunk(content="a"), - _any_id_ai_message_chunk(content="b"), + _any_id_ai_message_chunk( + content="a", + ), + _any_id_ai_message_chunk(content="b", chunk_position="last"), ] assert len({chunk.id for chunk in astream_chunks}) == 1 @@ -279,7 +285,9 @@ async def _astream( ) -> AsyncIterator[ChatGenerationChunk]: """Stream the output of the model.""" yield ChatGenerationChunk(message=AIMessageChunk(content="a")) - yield ChatGenerationChunk(message=AIMessageChunk(content="b")) + yield ChatGenerationChunk( + message=AIMessageChunk(content="b", chunk_position="last") + ) @property def _llm_type(self) -> str: @@ -288,8 +296,10 @@ def _llm_type(self) -> str: model = ModelWithAsyncStream() chunks = [chunk async for chunk in model.astream("anything")] assert chunks == [ - _any_id_ai_message_chunk(content="a"), - _any_id_ai_message_chunk(content="b"), + _any_id_ai_message_chunk( + content="a", + ), + _any_id_ai_message_chunk(content="b", chunk_position="last"), ] assert len({chunk.id for chunk in chunks}) == 1 diff --git a/libs/core/tests/unit_tests/messages/test_ai.py b/libs/core/tests/unit_tests/messages/test_ai.py index a0edf0b5714bc..1155587cf4b25 100644 --- a/libs/core/tests/unit_tests/messages/test_ai.py +++ b/libs/core/tests/unit_tests/messages/test_ai.py @@ -325,6 +325,52 @@ def test_content_blocks() -> None: ] assert message.content == "" + # Test we parse tool call chunks into tool calls for v1 content + chunk_1 = AIMessageChunk( + content="", + tool_call_chunks=[ + { + "type": "tool_call_chunk", + "name": "foo", + "args": '{"foo": "b', + "id": "abc_123", + "index": 0, + } + ], + ) + + chunk_2 = AIMessageChunk( + content="", + tool_call_chunks=[ + { + "type": "tool_call_chunk", + "name": "", + "args": 'ar"}', + "id": "abc_123", + "index": 0, + } + ], + ) + chunk_3 = AIMessageChunk(content="", chunk_position="last") + chunk = chunk_1 + chunk_2 + chunk_3 + assert chunk.content == "" + assert chunk.content_blocks == chunk.tool_calls + + # test v1 content + chunk_1.content = cast("Union[str, list[Union[str, dict]]]", chunk_1.content_blocks) + chunk_1.response_metadata["output_version"] = "v1" + chunk_2.content = cast("Union[str, list[Union[str, dict]]]", chunk_2.content_blocks) + + chunk = chunk_1 + chunk_2 + chunk_3 + assert chunk.content == [ + { + "type": "tool_call", + "name": "foo", + "args": {"foo": "bar"}, + "id": "abc_123", + } + ] + # Non-standard standard_content_1: list[types.ContentBlock] = [ {"type": "non_standard", "index": 0, "value": {"foo": "bar "}} diff --git a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr index 7b28b1523c10d..a3df49bf5d363 100644 --- a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr +++ b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr @@ -122,6 +122,19 @@ 'title': 'Additional Kwargs', 'type': 'object', }), + 'chunk_position': dict({ + 'anyOf': list([ + dict({ + 'const': 'last', + 'type': 'string', + }), + dict({ + 'type': 'null', + }), + ]), + 'default': None, + 'title': 'Chunk Position', + }), 'content': dict({ 'anyOf': list([ dict({ @@ -1570,6 +1583,19 @@ 'title': 'Additional Kwargs', 'type': 'object', }), + 'chunk_position': dict({ + 'anyOf': list([ + dict({ + 'const': 'last', + 'type': 'string', + }), + dict({ + 'type': 'null', + }), + ]), + 'default': None, + 'title': 'Chunk Position', + }), 'content': dict({ 'anyOf': list([ dict({ diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr index 3d9e96918bfc5..425fdd00341b5 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr @@ -525,6 +525,19 @@ 'title': 'Additional Kwargs', 'type': 'object', }), + 'chunk_position': dict({ + 'anyOf': list([ + dict({ + 'const': 'last', + 'type': 'string', + }), + dict({ + 'type': 'null', + }), + ]), + 'default': None, + 'title': 'Chunk Position', + }), 'content': dict({ 'anyOf': list([ dict({ diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index ab07b2c2c0991..b5348ce867d02 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -2077,6 +2077,18 @@ 'title': 'Additional Kwargs', 'type': 'object', }), + 'chunk_position': dict({ + 'anyOf': list([ + dict({ + 'const': 'last', + }), + dict({ + 'type': 'null', + }), + ]), + 'default': None, + 'title': 'Chunk Position', + }), 'content': dict({ 'anyOf': list([ dict({ @@ -3506,6 +3518,18 @@ 'title': 'Additional Kwargs', 'type': 'object', }), + 'chunk_position': dict({ + 'anyOf': list([ + dict({ + 'const': 'last', + }), + dict({ + 'type': 'null', + }), + ]), + 'default': None, + 'title': 'Chunk Position', + }), 'content': dict({ 'anyOf': list([ dict({ @@ -5029,6 +5053,18 @@ 'title': 'Additional Kwargs', 'type': 'object', }), + 'chunk_position': dict({ + 'anyOf': list([ + dict({ + 'const': 'last', + }), + dict({ + 'type': 'null', + }), + ]), + 'default': None, + 'title': 'Chunk Position', + }), 'content': dict({ 'anyOf': list([ dict({ @@ -6490,6 +6526,18 @@ 'title': 'Additional Kwargs', 'type': 'object', }), + 'chunk_position': dict({ + 'anyOf': list([ + dict({ + 'const': 'last', + }), + dict({ + 'type': 'null', + }), + ]), + 'default': None, + 'title': 'Chunk Position', + }), 'content': dict({ 'anyOf': list([ dict({ @@ -7961,6 +8009,18 @@ 'title': 'Additional Kwargs', 'type': 'object', }), + 'chunk_position': dict({ + 'anyOf': list([ + dict({ + 'const': 'last', + }), + dict({ + 'type': 'null', + }), + ]), + 'default': None, + 'title': 'Chunk Position', + }), 'content': dict({ 'anyOf': list([ dict({ @@ -9467,6 +9527,18 @@ 'title': 'Additional Kwargs', 'type': 'object', }), + 'chunk_position': dict({ + 'anyOf': list([ + dict({ + 'const': 'last', + }), + dict({ + 'type': 'null', + }), + ]), + 'default': None, + 'title': 'Chunk Position', + }), 'content': dict({ 'anyOf': list([ dict({ @@ -10846,6 +10918,18 @@ 'title': 'Additional Kwargs', 'type': 'object', }), + 'chunk_position': dict({ + 'anyOf': list([ + dict({ + 'const': 'last', + }), + dict({ + 'type': 'null', + }), + ]), + 'default': None, + 'title': 'Chunk Position', + }), 'content': dict({ 'anyOf': list([ dict({ @@ -12330,6 +12414,18 @@ 'title': 'Additional Kwargs', 'type': 'object', }), + 'chunk_position': dict({ + 'anyOf': list([ + dict({ + 'const': 'last', + }), + dict({ + 'type': 'null', + }), + ]), + 'default': None, + 'title': 'Chunk Position', + }), 'content': dict({ 'anyOf': list([ dict({ diff --git a/libs/core/tests/unit_tests/runnables/test_runnable.py b/libs/core/tests/unit_tests/runnables/test_runnable.py index d9a9db349e8ab..65b6ea5d4f9bd 100644 --- a/libs/core/tests/unit_tests/runnables/test_runnable.py +++ b/libs/core/tests/unit_tests/runnables/test_runnable.py @@ -1862,7 +1862,7 @@ def test_prompt_with_chat_model( ] == [ _any_id_ai_message_chunk(content="f"), _any_id_ai_message_chunk(content="o"), - _any_id_ai_message_chunk(content="o"), + _any_id_ai_message_chunk(content="o", chunk_position="last"), ] assert prompt_spy.call_args.args[1] == {"question": "What is your name?"} assert chat_spy.call_args.args[1] == ChatPromptValue( @@ -1971,7 +1971,7 @@ async def test_prompt_with_chat_model_async( ] == [ _any_id_ai_message_chunk(content="f"), _any_id_ai_message_chunk(content="o"), - _any_id_ai_message_chunk(content="o"), + _any_id_ai_message_chunk(content="o", chunk_position="last"), ] assert prompt_spy.call_args.args[1] == {"question": "What is your name?"} assert chat_spy.call_args.args[1] == ChatPromptValue( diff --git a/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py b/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py index b41409754a31e..2dc16821f2b53 100644 --- a/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py +++ b/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py @@ -503,7 +503,11 @@ async def test_astream_events_from_model() -> None: "tags": ["my_model"], }, { - "data": {"chunk": _any_id_ai_message_chunk(content="hello")}, + "data": { + "chunk": _any_id_ai_message_chunk( + content="hello", + ) + }, "event": "on_chat_model_stream", "metadata": {"a": "b"}, "name": "my_model", @@ -521,7 +525,11 @@ async def test_astream_events_from_model() -> None: "tags": ["my_model"], }, { - "data": {"chunk": _any_id_ai_message_chunk(content="world!")}, + "data": { + "chunk": _any_id_ai_message_chunk( + content="world!", chunk_position="last" + ) + }, "event": "on_chat_model_stream", "metadata": {"a": "b"}, "name": "my_model", @@ -530,7 +538,11 @@ async def test_astream_events_from_model() -> None: "tags": ["my_model"], }, { - "data": {"output": _any_id_ai_message_chunk(content="hello world!")}, + "data": { + "output": _any_id_ai_message_chunk( + content="hello world!", chunk_position="last" + ) + }, "event": "on_chat_model_end", "metadata": {"a": "b"}, "name": "my_model", @@ -574,7 +586,11 @@ def i_dont_stream(value: Any, config: RunnableConfig) -> Any: "tags": ["my_model"], }, { - "data": {"chunk": _any_id_ai_message_chunk(content="hello")}, + "data": { + "chunk": _any_id_ai_message_chunk( + content="hello", + ) + }, "event": "on_chat_model_stream", "metadata": { "a": "b", @@ -600,7 +616,11 @@ def i_dont_stream(value: Any, config: RunnableConfig) -> Any: "tags": ["my_model"], }, { - "data": {"chunk": _any_id_ai_message_chunk(content="world!")}, + "data": { + "chunk": _any_id_ai_message_chunk( + content="world!", chunk_position="last" + ) + }, "event": "on_chat_model_stream", "metadata": { "a": "b", @@ -698,7 +718,11 @@ async def ai_dont_stream(value: Any, config: RunnableConfig) -> Any: "tags": ["my_model"], }, { - "data": {"chunk": _any_id_ai_message_chunk(content="hello")}, + "data": { + "chunk": _any_id_ai_message_chunk( + content="hello", + ) + }, "event": "on_chat_model_stream", "metadata": { "a": "b", @@ -724,7 +748,11 @@ async def ai_dont_stream(value: Any, config: RunnableConfig) -> Any: "tags": ["my_model"], }, { - "data": {"chunk": _any_id_ai_message_chunk(content="world!")}, + "data": { + "chunk": _any_id_ai_message_chunk( + content="world!", chunk_position="last" + ) + }, "event": "on_chat_model_stream", "metadata": { "a": "b", @@ -891,7 +919,12 @@ async def test_event_stream_with_simple_chain() -> None: "tags": ["my_chain", "my_model", "seq:step:2"], }, { - "data": {"chunk": AIMessageChunk(content="hello", id="ai1")}, + "data": { + "chunk": AIMessageChunk( + content="hello", + id="ai1", + ) + }, "event": "on_chat_model_stream", "metadata": { "a": "b", @@ -905,7 +938,12 @@ async def test_event_stream_with_simple_chain() -> None: "tags": ["my_chain", "my_model", "seq:step:2"], }, { - "data": {"chunk": AIMessageChunk(content="hello", id="ai1")}, + "data": { + "chunk": AIMessageChunk( + content="hello", + id="ai1", + ) + }, "event": "on_chain_stream", "metadata": {"foo": "bar"}, "name": "my_chain", @@ -937,7 +975,11 @@ async def test_event_stream_with_simple_chain() -> None: "tags": ["my_chain"], }, { - "data": {"chunk": AIMessageChunk(content="world!", id="ai1")}, + "data": { + "chunk": AIMessageChunk( + content="world!", id="ai1", chunk_position="last" + ) + }, "event": "on_chat_model_stream", "metadata": { "a": "b", @@ -951,7 +993,11 @@ async def test_event_stream_with_simple_chain() -> None: "tags": ["my_chain", "my_model", "seq:step:2"], }, { - "data": {"chunk": AIMessageChunk(content="world!", id="ai1")}, + "data": { + "chunk": AIMessageChunk( + content="world!", id="ai1", chunk_position="last" + ) + }, "event": "on_chain_stream", "metadata": {"foo": "bar"}, "name": "my_chain", @@ -975,7 +1021,9 @@ async def test_event_stream_with_simple_chain() -> None: { "generation_info": None, "message": AIMessageChunk( - content="hello world!", id="ai1" + content="hello world!", + id="ai1", + chunk_position="last", ), "text": "hello world!", "type": "ChatGenerationChunk", @@ -1000,7 +1048,11 @@ async def test_event_stream_with_simple_chain() -> None: "tags": ["my_chain", "my_model", "seq:step:2"], }, { - "data": {"output": AIMessageChunk(content="hello world!", id="ai1")}, + "data": { + "output": AIMessageChunk( + content="hello world!", id="ai1", chunk_position="last" + ) + }, "event": "on_chain_end", "metadata": {"foo": "bar"}, "name": "my_chain", @@ -1851,7 +1903,12 @@ async def test_events_astream_config() -> None: "tags": [], }, { - "data": {"chunk": AIMessageChunk(content="Goodbye", id="ai2")}, + "data": { + "chunk": AIMessageChunk( + content="Goodbye", + id="ai2", + ) + }, "event": "on_chat_model_stream", "metadata": {}, "name": "RunnableConfigurableFields", @@ -1869,7 +1926,11 @@ async def test_events_astream_config() -> None: "tags": [], }, { - "data": {"chunk": AIMessageChunk(content="world", id="ai2")}, + "data": { + "chunk": AIMessageChunk( + content="world", id="ai2", chunk_position="last" + ) + }, "event": "on_chat_model_stream", "metadata": {}, "name": "RunnableConfigurableFields", @@ -1878,7 +1939,11 @@ async def test_events_astream_config() -> None: "tags": [], }, { - "data": {"output": AIMessageChunk(content="Goodbye world", id="ai2")}, + "data": { + "output": AIMessageChunk( + content="Goodbye world", id="ai2", chunk_position="last" + ) + }, "event": "on_chat_model_end", "metadata": {}, "name": "RunnableConfigurableFields", diff --git a/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py b/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py index a7731053032c8..536da3665ed9e 100644 --- a/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py +++ b/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py @@ -539,7 +539,11 @@ async def test_astream_events_from_model() -> None: "tags": ["my_model"], }, { - "data": {"chunk": _any_id_ai_message_chunk(content="hello")}, + "data": { + "chunk": _any_id_ai_message_chunk( + content="hello", + ) + }, "event": "on_chat_model_stream", "metadata": { "a": "b", @@ -565,7 +569,11 @@ async def test_astream_events_from_model() -> None: "tags": ["my_model"], }, { - "data": {"chunk": _any_id_ai_message_chunk(content="world!")}, + "data": { + "chunk": _any_id_ai_message_chunk( + content="world!", chunk_position="last" + ) + }, "event": "on_chat_model_stream", "metadata": { "a": "b", @@ -579,7 +587,9 @@ async def test_astream_events_from_model() -> None: }, { "data": { - "output": _any_id_ai_message_chunk(content="hello world!"), + "output": _any_id_ai_message_chunk( + content="hello world!", chunk_position="last" + ), }, "event": "on_chat_model_end", "metadata": { @@ -645,7 +655,11 @@ def i_dont_stream(value: Any, config: RunnableConfig) -> Any: "tags": ["my_model"], }, { - "data": {"chunk": _any_id_ai_message_chunk(content="hello")}, + "data": { + "chunk": _any_id_ai_message_chunk( + content="hello", + ) + }, "event": "on_chat_model_stream", "metadata": { "a": "b", @@ -671,7 +685,11 @@ def i_dont_stream(value: Any, config: RunnableConfig) -> Any: "tags": ["my_model"], }, { - "data": {"chunk": _any_id_ai_message_chunk(content="world!")}, + "data": { + "chunk": _any_id_ai_message_chunk( + content="world!", chunk_position="last" + ) + }, "event": "on_chat_model_stream", "metadata": { "a": "b", @@ -753,7 +771,11 @@ async def ai_dont_stream(value: Any, config: RunnableConfig) -> Any: "tags": ["my_model"], }, { - "data": {"chunk": _any_id_ai_message_chunk(content="hello")}, + "data": { + "chunk": _any_id_ai_message_chunk( + content="hello", + ) + }, "event": "on_chat_model_stream", "metadata": { "a": "b", @@ -779,7 +801,11 @@ async def ai_dont_stream(value: Any, config: RunnableConfig) -> Any: "tags": ["my_model"], }, { - "data": {"chunk": _any_id_ai_message_chunk(content="world!")}, + "data": { + "chunk": _any_id_ai_message_chunk( + content="world!", chunk_position="last" + ) + }, "event": "on_chat_model_stream", "metadata": { "a": "b", @@ -930,7 +956,12 @@ async def test_event_stream_with_simple_chain() -> None: "tags": ["my_chain", "my_model", "seq:step:2"], }, { - "data": {"chunk": AIMessageChunk(content="hello", id="ai1")}, + "data": { + "chunk": AIMessageChunk( + content="hello", + id="ai1", + ) + }, "event": "on_chat_model_stream", "metadata": { "a": "b", @@ -944,7 +975,12 @@ async def test_event_stream_with_simple_chain() -> None: "tags": ["my_chain", "my_model", "seq:step:2"], }, { - "data": {"chunk": AIMessageChunk(content="hello", id="ai1")}, + "data": { + "chunk": AIMessageChunk( + content="hello", + id="ai1", + ) + }, "event": "on_chain_stream", "metadata": {"foo": "bar"}, "name": "my_chain", @@ -976,7 +1012,11 @@ async def test_event_stream_with_simple_chain() -> None: "tags": ["my_chain"], }, { - "data": {"chunk": AIMessageChunk(content="world!", id="ai1")}, + "data": { + "chunk": AIMessageChunk( + content="world!", id="ai1", chunk_position="last" + ) + }, "event": "on_chat_model_stream", "metadata": { "a": "b", @@ -990,7 +1030,11 @@ async def test_event_stream_with_simple_chain() -> None: "tags": ["my_chain", "my_model", "seq:step:2"], }, { - "data": {"chunk": AIMessageChunk(content="world!", id="ai1")}, + "data": { + "chunk": AIMessageChunk( + content="world!", id="ai1", chunk_position="last" + ) + }, "event": "on_chain_stream", "metadata": {"foo": "bar"}, "name": "my_chain", @@ -1008,7 +1052,9 @@ async def test_event_stream_with_simple_chain() -> None: ] ] }, - "output": AIMessageChunk(content="hello world!", id="ai1"), + "output": AIMessageChunk( + content="hello world!", id="ai1", chunk_position="last" + ), }, "event": "on_chat_model_end", "metadata": { @@ -1023,7 +1069,11 @@ async def test_event_stream_with_simple_chain() -> None: "tags": ["my_chain", "my_model", "seq:step:2"], }, { - "data": {"output": AIMessageChunk(content="hello world!", id="ai1")}, + "data": { + "output": AIMessageChunk( + content="hello world!", id="ai1", chunk_position="last" + ) + }, "event": "on_chain_end", "metadata": {"foo": "bar"}, "name": "my_chain", @@ -1805,7 +1855,12 @@ async def test_events_astream_config() -> None: "tags": [], }, { - "data": {"chunk": AIMessageChunk(content="Goodbye", id="ai2")}, + "data": { + "chunk": AIMessageChunk( + content="Goodbye", + id="ai2", + ) + }, "event": "on_chat_model_stream", "metadata": {"ls_model_type": "chat"}, "name": "GenericFakeChatModel", @@ -1823,7 +1878,11 @@ async def test_events_astream_config() -> None: "tags": [], }, { - "data": {"chunk": AIMessageChunk(content="world", id="ai2")}, + "data": { + "chunk": AIMessageChunk( + content="world", id="ai2", chunk_position="last" + ) + }, "event": "on_chat_model_stream", "metadata": {"ls_model_type": "chat"}, "name": "GenericFakeChatModel", @@ -1833,7 +1892,9 @@ async def test_events_astream_config() -> None: }, { "data": { - "output": AIMessageChunk(content="Goodbye world", id="ai2"), + "output": AIMessageChunk( + content="Goodbye world", id="ai2", chunk_position="last" + ), }, "event": "on_chat_model_end", "metadata": {"ls_model_type": "chat"}, diff --git a/libs/langchain/tests/unit_tests/agents/test_agent.py b/libs/langchain/tests/unit_tests/agents/test_agent.py index 36045d82a0d99..d5b29a74ca8bb 100644 --- a/libs/langchain/tests/unit_tests/agents/test_agent.py +++ b/libs/langchain/tests/unit_tests/agents/test_agent.py @@ -908,6 +908,7 @@ def find_pet(pet: str) -> str: "name": "find_pet", }, }, + "chunk_position": "last", "content": "", "example": False, "invalid_tool_calls": [], @@ -1112,6 +1113,7 @@ def check_time() -> str: }, ], }, + chunk_position="last", ), ], tool_call_id="0", @@ -1138,6 +1140,7 @@ def check_time() -> str: }, ], }, + chunk_position="last", ), ], }, @@ -1168,6 +1171,7 @@ def check_time() -> str: }, ], }, + chunk_position="last", ), ], tool_call_id="1", @@ -1194,6 +1198,7 @@ def check_time() -> str: }, ], }, + chunk_position="last", ), ], }, @@ -1231,6 +1236,7 @@ def check_time() -> str: }, ], }, + chunk_position="last", ), ], tool_call_id="0", @@ -1273,6 +1279,7 @@ def check_time() -> str: }, ], }, + chunk_position="last", ), ], tool_call_id="1", diff --git a/libs/langchain/tests/unit_tests/llms/fake_chat_model.py b/libs/langchain/tests/unit_tests/llms/fake_chat_model.py index c4f2670fb865e..3ec5a7924eaf7 100644 --- a/libs/langchain/tests/unit_tests/llms/fake_chat_model.py +++ b/libs/langchain/tests/unit_tests/llms/fake_chat_model.py @@ -130,10 +130,16 @@ def _stream( assert isinstance(content, str) content_chunks = cast("list[str]", re.split(r"(\s)", content)) - for token in content_chunks: + for idx, token in enumerate(content_chunks): chunk = ChatGenerationChunk( message=AIMessageChunk(id=message.id, content=token), ) + if ( + idx == len(content_chunks) - 1 + and isinstance(chunk.message, AIMessageChunk) + and not message.additional_kwargs + ): + chunk.message.chunk_position = "last" if run_manager: run_manager.on_llm_new_token(token, chunk=chunk) yield chunk diff --git a/libs/langchain/tests/unit_tests/llms/test_fake_chat_model.py b/libs/langchain/tests/unit_tests/llms/test_fake_chat_model.py index e5e8de87f0fcb..4bcb363d8f48f 100644 --- a/libs/langchain/tests/unit_tests/llms/test_fake_chat_model.py +++ b/libs/langchain/tests/unit_tests/llms/test_fake_chat_model.py @@ -49,14 +49,14 @@ async def test_generic_fake_chat_model_stream() -> None: assert chunks == [ _AnyIdAIMessageChunk(content="hello"), _AnyIdAIMessageChunk(content=" "), - _AnyIdAIMessageChunk(content="goodbye"), + _AnyIdAIMessageChunk(content="goodbye", chunk_position="last"), ] chunks = list(model.stream("meow")) assert chunks == [ _AnyIdAIMessageChunk(content="hello"), _AnyIdAIMessageChunk(content=" "), - _AnyIdAIMessageChunk(content="goodbye"), + _AnyIdAIMessageChunk(content="goodbye", chunk_position="last"), ] # Test streaming of additional kwargs. @@ -67,6 +67,7 @@ async def test_generic_fake_chat_model_stream() -> None: assert chunks == [ _AnyIdAIMessageChunk(content="", additional_kwargs={"foo": 42}), _AnyIdAIMessageChunk(content="", additional_kwargs={"bar": 24}), + _AnyIdAIMessageChunk(content="", chunk_position="last"), ] message = AIMessage( @@ -108,6 +109,7 @@ async def test_generic_fake_chat_model_stream() -> None: "function_call": {"arguments": '\n "destination_path": "bar"\n}'}, }, ), + _AnyIdAIMessageChunk(content="", chunk_position="last"), ] accumulate_chunks = None @@ -127,6 +129,7 @@ async def test_generic_fake_chat_model_stream() -> None: 'destination_path": "bar"\n}', }, }, + chunk_position="last", ) @@ -141,7 +144,7 @@ async def test_generic_fake_chat_model_astream_log() -> None: assert final.state["streamed_output"] == [ _AnyIdAIMessageChunk(content="hello"), _AnyIdAIMessageChunk(content=" "), - _AnyIdAIMessageChunk(content="goodbye"), + _AnyIdAIMessageChunk(content="goodbye", chunk_position="last"), ] @@ -198,6 +201,6 @@ async def on_llm_new_token( assert results == [ _AnyIdAIMessageChunk(content="hello"), _AnyIdAIMessageChunk(content=" "), - _AnyIdAIMessageChunk(content="goodbye"), + _AnyIdAIMessageChunk(content="goodbye", chunk_position="last"), ] assert tokens == ["hello", " ", "goodbye"] diff --git a/libs/partners/anthropic/langchain_anthropic/chat_models.py b/libs/partners/anthropic/langchain_anthropic/chat_models.py index 18651fb5f2c03..9dd6b6a00b215 100644 --- a/libs/partners/anthropic/langchain_anthropic/chat_models.py +++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py @@ -2432,6 +2432,8 @@ def _make_message_chunk_from_anthropic_event( "stop_sequence": event.delta.stop_sequence, }, ) + if message_chunk.response_metadata.get("stop_reason"): + message_chunk.chunk_position = "last" # Unhandled event types (e.g., `content_block_stop`, `ping` events) # https://docs.anthropic.com/en/docs/build-with-claude/streaming#other-events else: diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py index 6181f008abad1..f18e8240a9aba 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py @@ -482,7 +482,7 @@ def test_tool_use() -> None: assert len(content_blocks) == 2 assert content_blocks[0]["type"] == "text" assert content_blocks[0]["text"] - assert content_blocks[1]["type"] == "tool_call_chunk" + assert content_blocks[1]["type"] == "tool_call" assert content_blocks[1]["name"] == "get_weather" assert content_blocks[1]["args"] diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index e409713b54d98..769c3920a4818 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -938,7 +938,6 @@ def _convert_chunk_to_generation_chunk( generation_info = {**base_generation_info} if base_generation_info else {} if finish_reason := choice.get("finish_reason"): - generation_info["model_provider"] = "openai" generation_info["finish_reason"] = finish_reason if model_name := chunk.get("model"): generation_info["model_name"] = model_name @@ -946,6 +945,8 @@ def _convert_chunk_to_generation_chunk( generation_info["system_fingerprint"] = system_fingerprint if service_tier := chunk.get("service_tier"): generation_info["service_tier"] = service_tier + if isinstance(message_chunk, AIMessageChunk): + message_chunk.chunk_position = "last" logprobs = choice.get("logprobs") if logprobs: @@ -954,6 +955,7 @@ def _convert_chunk_to_generation_chunk( if usage_metadata and isinstance(message_chunk, AIMessageChunk): message_chunk.usage_metadata = usage_metadata + message_chunk.response_metadata["model_provider"] = "openai" generation_chunk = ChatGenerationChunk( message=message_chunk, generation_info=generation_info or None ) @@ -4157,6 +4159,7 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: response_metadata = {} response_metadata["model_provider"] = "openai" usage_metadata = None + chunk_position: Optional[Literal["last"]] = None id = None if chunk.type == "response.output_text.delta": _advance(chunk.output_index, chunk.content_index) @@ -4194,6 +4197,7 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: response_metadata = { k: v for k, v in msg.response_metadata.items() if k != "id" } + chunk_position = "last" elif chunk.type == "response.output_item.added" and chunk.item.type == "message": if output_version == "v0": id = chunk.item.id @@ -4311,6 +4315,7 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: response_metadata=response_metadata, additional_kwargs=additional_kwargs, id=id, + chunk_position=chunk_position, ) if output_version == "v0": message = cast( diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py index b914af07cba3e..9c1979226458b 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py @@ -253,7 +253,9 @@ def test_stream() -> None: assert isinstance(chunk, AIMessageChunk) if chunk.usage_metadata is not None: chunks_with_token_counts += 1 - if chunk.response_metadata: + if chunk.response_metadata and not set(chunk.response_metadata.keys()).issubset( + {"model_provider", "output_version"} + ): chunks_with_response_metadata += 1 if chunks_with_token_counts != 1 or chunks_with_response_metadata != 1: raise AssertionError( @@ -281,7 +283,9 @@ async def _test_stream(stream: AsyncIterator, expect_usage: bool) -> None: assert isinstance(chunk, AIMessageChunk) if chunk.usage_metadata is not None: chunks_with_token_counts += 1 - if chunk.response_metadata: + if chunk.response_metadata and not set( + chunk.response_metadata.keys() + ).issubset({"model_provider", "output_version"}): chunks_with_response_metadata += 1 assert isinstance(full, AIMessageChunk) if chunks_with_response_metadata != 1: @@ -449,6 +453,7 @@ def test_tool_use() -> None: assert len(gathered.tool_call_chunks) == 1 tool_call_chunk = gathered.tool_call_chunks[0] assert "args" in tool_call_chunk + assert gathered.content_blocks == gathered.tool_calls streaming_tool_msg = ToolMessage( "sally_green_hair", diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index bd9b83752a289..bb58e66d29b94 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -163,6 +163,11 @@ def multiply(x: int, y: int) -> int: assert full.tool_calls[0]["name"] == "multiply" assert set(full.tool_calls[0]["args"]) == {"x", "y"} + if output_version in ("responses/v1", "v1"): + for msg in [ai_msg, full]: + assert len(msg.content_blocks) == 1 + assert msg.content_blocks[0]["type"] == "tool_call" + response = bound_llm.invoke("What was a positive news story from today?") _check_response(response) From c63c3ea29052f4be0b85772f8b7e7a021432f90e Mon Sep 17 00:00:00 2001 From: ccurme Date: Tue, 26 Aug 2025 14:29:20 -0300 Subject: [PATCH 50/73] feat(core): (v1) add sentinel value to `output_version` (#32692) --- .../language_models/chat_models.py | 4 +-- .../language_models/chat_models/test_cache.py | 5 ++-- .../__snapshots__/test_runnable.ambr | 28 +++++++++---------- .../tests/unit_tests/chat_models/test_base.py | 4 +-- .../chat_models/test_chat_models.py | 4 +-- .../__snapshots__/test_standard.ambr | 1 - .../langchain_openai/chat_models/base.py | 20 +++++++++++-- .../__snapshots__/test_azure_standard.ambr | 1 - .../__snapshots__/test_base_standard.ambr | 1 - .../test_responses_standard.ambr | 1 - .../test_chat_models_standard.ambr | 1 - 11 files changed, 39 insertions(+), 31 deletions(-) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index c08d73c33954e..365fc4a1ba33d 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -337,8 +337,8 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC): """ - output_version: str = Field( - default_factory=from_env("LC_OUTPUT_VERSION", default="v0") + output_version: Optional[str] = Field( + default_factory=from_env("LC_OUTPUT_VERSION", default=None) ) """Version of ``AIMessage`` output format to store in message content. diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_cache.py b/libs/core/tests/unit_tests/language_models/chat_models/test_cache.py index 7cf428bb3ab15..39e4babc7821f 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_cache.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_cache.py @@ -301,9 +301,8 @@ def test_llm_representation_for_serializable() -> None: assert chat._get_llm_string() == ( '{"id": ["tests", "unit_tests", "language_models", "chat_models", ' '"test_cache", "CustomChat"], "kwargs": {"messages": {"id": ' - '["builtins", "list_iterator"], "lc": 1, "type": "not_implemented"}, ' - '"output_version": "v0"}, "lc": 1, "name": "CustomChat", "type": ' - "\"constructor\"}---[('stop', None)]" + '["builtins", "list_iterator"], "lc": 1, "type": "not_implemented"}}, "lc": ' + '1, "name": "CustomChat", "type": "constructor"}---[(\'stop\', None)]' ) diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index b5348ce867d02..fe1b5884ed626 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -97,7 +97,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(output_version='v0', responses=['foo, bar'])", + "repr": "FakeListChatModel(responses=['foo, bar'])", "name": "FakeListChatModel" } ], @@ -227,7 +227,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(output_version='v0', responses=['baz, qux'])", + "repr": "FakeListChatModel(responses=['baz, qux'])", "name": "FakeListChatModel" } ], @@ -346,7 +346,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(output_version='v0', responses=['foo, bar'])", + "repr": "FakeListChatModel(responses=['foo, bar'])", "name": "FakeListChatModel" }, { @@ -457,7 +457,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(output_version='v0', responses=['baz, qux'])", + "repr": "FakeListChatModel(responses=['baz, qux'])", "name": "FakeListChatModel" } ], @@ -1009,7 +1009,7 @@ # name: test_prompt_with_chat_model ''' ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a nice assistant.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='{question}'), additional_kwargs={})]) - | FakeListChatModel(output_version='v0', responses=['foo']) + | FakeListChatModel(responses=['foo']) ''' # --- # name: test_prompt_with_chat_model.1 @@ -1109,7 +1109,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(output_version='v0', responses=['foo'])", + "repr": "FakeListChatModel(responses=['foo'])", "name": "FakeListChatModel" } }, @@ -1220,7 +1220,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(output_version='v0', responses=['foo, bar'])", + "repr": "FakeListChatModel(responses=['foo, bar'])", "name": "FakeListChatModel" } ], @@ -1249,7 +1249,7 @@ # name: test_prompt_with_chat_model_async ''' ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a nice assistant.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='{question}'), additional_kwargs={})]) - | FakeListChatModel(output_version='v0', responses=['foo']) + | FakeListChatModel(responses=['foo']) ''' # --- # name: test_prompt_with_chat_model_async.1 @@ -1349,7 +1349,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(output_version='v0', responses=['foo'])", + "repr": "FakeListChatModel(responses=['foo'])", "name": "FakeListChatModel" } }, @@ -13775,7 +13775,7 @@ just_to_test_lambda: RunnableLambda(...) } | ChatPromptTemplate(input_variables=['documents', 'question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a nice assistant.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['documents', 'question'], input_types={}, partial_variables={}, template='Context:\n{documents}\n\nQuestion:\n{question}'), additional_kwargs={})]) - | FakeListChatModel(output_version='v0', responses=['foo, bar']) + | FakeListChatModel(responses=['foo, bar']) | CommaSeparatedListOutputParser() ''' # --- @@ -13978,7 +13978,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(output_version='v0', responses=['foo, bar'])", + "repr": "FakeListChatModel(responses=['foo, bar'])", "name": "FakeListChatModel" } ], @@ -14004,7 +14004,7 @@ ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a nice assistant.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='{question}'), additional_kwargs={})]) | RunnableLambda(...) | { - chat: FakeListChatModel(output_version='v0', responses=["i'm a chatbot"]), + chat: FakeListChatModel(responses=["i'm a chatbot"]), llm: FakeListLLM(responses=["i'm a textbot"]) } ''' @@ -14130,7 +14130,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(output_version='v0', responses=[\"i'm a chatbot\"])", + "repr": "FakeListChatModel(responses=[\"i'm a chatbot\"])", "name": "FakeListChatModel" }, "llm": { @@ -14285,7 +14285,7 @@ "fake_chat_models", "FakeListChatModel" ], - "repr": "FakeListChatModel(output_version='v0', responses=[\"i'm a chatbot\"])", + "repr": "FakeListChatModel(responses=[\"i'm a chatbot\"])", "name": "FakeListChatModel" }, "kwargs": { diff --git a/libs/langchain/tests/unit_tests/chat_models/test_base.py b/libs/langchain/tests/unit_tests/chat_models/test_base.py index 6c8df01254070..1ae035047065a 100644 --- a/libs/langchain/tests/unit_tests/chat_models/test_base.py +++ b/libs/langchain/tests/unit_tests/chat_models/test_base.py @@ -145,7 +145,7 @@ def test_configurable() -> None: "openai_api_base": None, "openai_organization": None, "openai_proxy": None, - "output_version": "v0", + "output_version": None, "request_timeout": None, "max_retries": None, "presence_penalty": None, @@ -277,7 +277,7 @@ def test_configurable_with_default() -> None: "model_kwargs": {}, "streaming": False, "stream_usage": True, - "output_version": "v0", + "output_version": None, }, "kwargs": { "tools": [{"name": "foo", "description": "foo", "input_schema": {}}], diff --git a/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py b/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py index 369b74bc7e507..3dfe3a6d6ee2c 100644 --- a/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py +++ b/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py @@ -145,7 +145,7 @@ def test_configurable() -> None: "openai_api_base": None, "openai_organization": None, "openai_proxy": None, - "output_version": "v0", + "output_version": None, "request_timeout": None, "max_retries": None, "presence_penalty": None, @@ -277,7 +277,7 @@ def test_configurable_with_default() -> None: "model_kwargs": {}, "streaming": False, "stream_usage": True, - "output_version": "v0", + "output_version": None, }, "kwargs": { "tools": [{"name": "foo", "description": "foo", "input_schema": {}}], diff --git a/libs/partners/anthropic/tests/unit_tests/__snapshots__/test_standard.ambr b/libs/partners/anthropic/tests/unit_tests/__snapshots__/test_standard.ambr index 5c9164caae365..b831aef469b44 100644 --- a/libs/partners/anthropic/tests/unit_tests/__snapshots__/test_standard.ambr +++ b/libs/partners/anthropic/tests/unit_tests/__snapshots__/test_standard.ambr @@ -20,7 +20,6 @@ 'max_retries': 2, 'max_tokens': 100, 'model': 'claude-3-haiku-20240307', - 'output_version': 'v0', 'stop_sequences': list([ ]), 'stream_usage': True, diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 769c3920a4818..4e9cd9f6baa35 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -706,7 +706,9 @@ class BaseChatOpenAI(BaseChatModel): .. versionadded:: 0.3.9 """ - output_version: str = "v0" + output_version: Optional[str] = Field( + default_factory=from_env("LC_OUTPUT_VERSION", default=None) + ) """Version of AIMessage output format to use. This field is used to roll-out new output formats for chat model AIMessages @@ -3938,12 +3940,18 @@ def _construct_lc_result_from_responses_api( response: Response, schema: Optional[type[_BM]] = None, metadata: Optional[dict] = None, - output_version: str = "v0", + output_version: Optional[str] = None, ) -> ChatResult: """Construct ChatResponse from OpenAI Response API response.""" if response.error: raise ValueError(response.error) + if output_version is None: + # Sentinel value of None lets us know if output_version is set explicitly. + # Explicitly setting `output_version="responses/v1"` separately enables the + # Responses API. + output_version = "v0" + response_metadata = { k: v for k, v in response.model_dump(exclude_none=True, mode="json").items() @@ -4103,7 +4111,7 @@ def _convert_responses_chunk_to_generation_chunk( schema: Optional[type[_BM]] = None, metadata: Optional[dict] = None, has_reasoning: bool = False, - output_version: str = "v0", + output_version: Optional[str] = None, ) -> tuple[int, int, int, Optional[ChatGenerationChunk]]: def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: """Advance indexes tracked during streaming. @@ -4150,6 +4158,12 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: current_sub_index = sub_idx current_output_index = output_idx + if output_version is None: + # Sentinel value of None lets us know if output_version is set explicitly. + # Explicitly setting `output_version="responses/v1"` separately enables the + # Responses API. + output_version = "v0" + content = [] tool_call_chunks: list = [] additional_kwargs: dict = {} diff --git a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_azure_standard.ambr b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_azure_standard.ambr index ddadd6fc09be9..2060512958a9f 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_azure_standard.ambr +++ b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_azure_standard.ambr @@ -24,7 +24,6 @@ }), 'openai_api_type': 'azure', 'openai_api_version': '2021-10-01', - 'output_version': 'v0', 'request_timeout': 60.0, 'stop': list([ ]), diff --git a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_base_standard.ambr b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_base_standard.ambr index 1a74f4978a70d..e7307c6158fbc 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_base_standard.ambr +++ b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_base_standard.ambr @@ -18,7 +18,6 @@ 'lc': 1, 'type': 'secret', }), - 'output_version': 'v0', 'request_timeout': 60.0, 'stop': list([ ]), diff --git a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_responses_standard.ambr b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_responses_standard.ambr index 10d1355af4e46..88a49a2750227 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_responses_standard.ambr +++ b/libs/partners/openai/tests/unit_tests/chat_models/__snapshots__/test_responses_standard.ambr @@ -18,7 +18,6 @@ 'lc': 1, 'type': 'secret', }), - 'output_version': 'v0', 'request_timeout': 60.0, 'stop': list([ ]), diff --git a/libs/partners/xai/tests/unit_tests/__snapshots__/test_chat_models_standard.ambr b/libs/partners/xai/tests/unit_tests/__snapshots__/test_chat_models_standard.ambr index e61b99508aa14..4cd1261555c90 100644 --- a/libs/partners/xai/tests/unit_tests/__snapshots__/test_chat_models_standard.ambr +++ b/libs/partners/xai/tests/unit_tests/__snapshots__/test_chat_models_standard.ambr @@ -10,7 +10,6 @@ 'max_retries': 2, 'max_tokens': 100, 'model_name': 'grok-beta', - 'output_version': 'v0', 'request_timeout': 60.0, 'stop': list([ ]), From 518f4dfccb7fabf189b6fa04b050ae7616cda0a0 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 26 Aug 2025 13:54:04 -0400 Subject: [PATCH 51/73] . --- .../language_models/chat_models.py | 184 +++++++- libs/core/langchain_core/utils/_merge.py | 14 +- .../unit_tests/fake/test_fake_chat_model.py | 116 +++-- .../language_models/chat_models/test_base.py | 49 +- .../chat_models/test_output_version.py | 341 ++++++++++++++ .../chat_models/test_rate_limiting.py | 3 +- .../__snapshots__/test_runnable.ambr | 442 ------------------ .../unit_tests/runnables/test_runnable.py | 12 +- .../runnables/test_runnable_events_v1.py | 8 +- .../runnables/test_runnable_events_v2.py | 8 +- 10 files changed, 672 insertions(+), 505 deletions(-) create mode 100644 libs/core/tests/unit_tests/language_models/chat_models/test_output_version.py diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index c08d73c33954e..c139f63aac18f 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -419,9 +419,29 @@ def invoke( config: Optional[RunnableConfig] = None, *, stop: Optional[list[str]] = None, + output_version: Optional[str] = None, **kwargs: Any, ) -> AIMessage: + """Invoke the chat model. + + Args: + input: The input to the chat model. + config: The config to use for this run. + stop: Stop words to use when generating. + output_version: Override the model's ``output_version`` for this invocation. + If None, uses the model's configured ``output_version``. + **kwargs: Additional keyword arguments. + + Returns: + The model's response message. + """ config = ensure_config(config) + + effective_output_version = ( + output_version if output_version is not None else self.output_version + ) + kwargs["_output_version"] = effective_output_version + return cast( "AIMessage", cast( @@ -446,9 +466,29 @@ async def ainvoke( config: Optional[RunnableConfig] = None, *, stop: Optional[list[str]] = None, + output_version: Optional[str] = None, **kwargs: Any, ) -> AIMessage: + """Asynchronously invoke the chat model. + + Args: + input: The input to the chat model. + config: The config to use for this run. + stop: Stop words to use when generating. + output_version: Override the model's ``output_version`` for this invocation. + If None, uses the model's configured ``output_version``. + **kwargs: Additional keyword arguments. + + Returns: + The model's response message. + """ config = ensure_config(config) + + effective_output_version = ( + output_version if output_version is not None else self.output_version + ) + kwargs["_output_version"] = effective_output_version + llm_result = await self.agenerate_prompt( [self._convert_input(input)], stop=stop, @@ -505,13 +545,38 @@ def stream( config: Optional[RunnableConfig] = None, *, stop: Optional[list[str]] = None, + output_version: Optional[str] = None, **kwargs: Any, ) -> Iterator[AIMessageChunk]: + """Stream responses from the chat model. + + Args: + input: The input to the chat model. + config: The config to use for this run. + stop: Stop words to use when generating. + output_version: Override the model's ``output_version`` for this invocation. + If None, uses the model's configured ``output_version``. + **kwargs: Additional keyword arguments. + + Returns: + Iterator of message chunks. + """ + effective_output_version = ( + output_version if output_version is not None else self.output_version + ) + kwargs["_output_version"] = effective_output_version + if not self._should_stream(async_api=False, **{**kwargs, "stream": True}): # model doesn't implement streaming, so use default implementation yield cast( "AIMessageChunk", - self.invoke(input, config=config, stop=stop, **kwargs), + self.invoke( + input, + config=config, + stop=stop, + output_version=effective_output_version, + **kwargs, + ), ) else: config = ensure_config(config) @@ -557,11 +622,21 @@ def stream( input_messages = _normalize_messages(messages) run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id))) yielded = False - for chunk in self._stream(input_messages, stop=stop, **kwargs): + for chunk in self._stream( + input_messages, + stop=stop, + output_version=kwargs["_output_version"], + **kwargs, + ): if chunk.message.id is None: chunk.message.id = run_id chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) - if self.output_version == "v1": + output_version = kwargs["_output_version"] + if isinstance(chunk.message, (AIMessage, AIMessageChunk)): + chunk.message.additional_kwargs["output_version"] = ( + output_version + ) + if output_version == "v1": # Overwrite .content with .content_blocks chunk.message = _update_message_content_to_blocks( chunk.message, "v1" @@ -617,13 +692,38 @@ async def astream( config: Optional[RunnableConfig] = None, *, stop: Optional[list[str]] = None, + output_version: Optional[str] = None, **kwargs: Any, ) -> AsyncIterator[AIMessageChunk]: + """Asynchronously stream responses from the chat model. + + Args: + input: The input to the chat model. + config: The config to use for this run. + stop: Stop words to use when generating. + output_version: Override the model's ``output_version`` for this invocation. + If None, uses the model's configured ``output_version``. + **kwargs: Additional keyword arguments. + + Returns: + Async iterator of message chunks. + """ + effective_output_version = ( + output_version if output_version is not None else self.output_version + ) + kwargs["_output_version"] = effective_output_version + if not self._should_stream(async_api=True, **{**kwargs, "stream": True}): # No async or sync stream is implemented, so fall back to ainvoke yield cast( "AIMessageChunk", - await self.ainvoke(input, config=config, stop=stop, **kwargs), + await self.ainvoke( + input, + config=config, + stop=stop, + output_version=effective_output_version, + **kwargs, + ), ) return @@ -674,12 +774,16 @@ async def astream( async for chunk in self._astream( input_messages, stop=stop, + output_version=kwargs["_output_version"], **kwargs, ): if chunk.message.id is None: chunk.message.id = run_id chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) - if self.output_version == "v1": + output_version = kwargs["_output_version"] + if isinstance(chunk.message, (AIMessage, AIMessageChunk)): + chunk.message.additional_kwargs["output_version"] = output_version + if output_version == "v1": # Overwrite .content with .content_blocks chunk.message = _update_message_content_to_blocks( chunk.message, "v1" @@ -707,7 +811,10 @@ async def astream( generations_with_error_metadata = _generate_response_from_error(e) chat_generation_chunk = merge_chat_generation_chunks(chunks) if chat_generation_chunk: - generations = [[chat_generation_chunk], generations_with_error_metadata] + generations = [ + [chat_generation_chunk], + generations_with_error_metadata, + ] else: generations = [generations_with_error_metadata] await run_manager.on_llm_error( @@ -1143,6 +1250,8 @@ def _generate_with_cache( if self.rate_limiter: self.rate_limiter.acquire(blocking=True) + output_version = kwargs.pop("_output_version", self.output_version) + # If stream is not explicitly set, check if implicitly requested by # astream_events() or astream_log(). Bail out if _stream not implemented if self._should_stream( @@ -1155,12 +1264,16 @@ def _generate_with_cache( f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None ) yielded = False - for chunk in self._stream(messages, stop=stop, **kwargs): + for chunk in self._stream( + messages, stop=stop, output_version=output_version, **kwargs + ): chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) + if isinstance(chunk.message, (AIMessage, AIMessageChunk)): + chunk.message.additional_kwargs["output_version"] = output_version if run_manager: if chunk.message.id is None: chunk.message.id = run_id - if self.output_version == "v1": + if output_version == "v1": # Overwrite .content with .content_blocks chunk.message = _update_message_content_to_blocks( chunk.message, "v1" @@ -1190,12 +1303,18 @@ def _generate_with_cache( result = generate_from_stream(iter(chunks)) elif inspect.signature(self._generate).parameters.get("run_manager"): result = self._generate( - messages, stop=stop, run_manager=run_manager, **kwargs + messages, + stop=stop, + run_manager=run_manager, + output_version=output_version, + **kwargs, ) else: - result = self._generate(messages, stop=stop, **kwargs) + result = self._generate( + messages, stop=stop, output_version=output_version, **kwargs + ) - if self.output_version == "v1": + if output_version == "v1": # Overwrite .content with .content_blocks for generation in result.generations: generation.message = _update_message_content_to_blocks( @@ -1209,6 +1328,8 @@ def _generate_with_cache( generation.message.response_metadata = _gen_info_and_msg_metadata( generation ) + if isinstance(generation.message, (AIMessage, AIMessageChunk)): + generation.message.additional_kwargs["output_version"] = output_version if len(result.generations) == 1 and result.llm_output is not None: result.generations[0].message.response_metadata = { **result.llm_output, @@ -1250,6 +1371,8 @@ async def _agenerate_with_cache( if self.rate_limiter: await self.rate_limiter.aacquire(blocking=True) + output_version = kwargs.pop("_output_version", self.output_version) + # If stream is not explicitly set, check if implicitly requested by # astream_events() or astream_log(). Bail out if _astream not implemented if self._should_stream( @@ -1262,12 +1385,16 @@ async def _agenerate_with_cache( f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None ) yielded = False - async for chunk in self._astream(messages, stop=stop, **kwargs): + async for chunk in self._astream( + messages, stop=stop, output_version=output_version, **kwargs + ): chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) + if isinstance(chunk.message, (AIMessage, AIMessageChunk)): + chunk.message.additional_kwargs["output_version"] = output_version if run_manager: if chunk.message.id is None: chunk.message.id = run_id - if self.output_version == "v1": + if output_version == "v1": # Overwrite .content with .content_blocks chunk.message = _update_message_content_to_blocks( chunk.message, "v1" @@ -1297,12 +1424,18 @@ async def _agenerate_with_cache( result = generate_from_stream(iter(chunks)) elif inspect.signature(self._agenerate).parameters.get("run_manager"): result = await self._agenerate( - messages, stop=stop, run_manager=run_manager, **kwargs + messages, + stop=stop, + run_manager=run_manager, + output_version=output_version, + **kwargs, ) else: - result = await self._agenerate(messages, stop=stop, **kwargs) + result = await self._agenerate( + messages, stop=stop, output_version=output_version, **kwargs + ) - if self.output_version == "v1": + if output_version == "v1": # Overwrite .content with .content_blocks for generation in result.generations: generation.message = _update_message_content_to_blocks( @@ -1316,6 +1449,8 @@ async def _agenerate_with_cache( generation.message.response_metadata = _gen_info_and_msg_metadata( generation ) + if isinstance(generation.message, (AIMessage, AIMessageChunk)): + generation.message.additional_kwargs["output_version"] = output_version if len(result.generations) == 1 and result.llm_output is not None: result.generations[0].message.response_metadata = { **result.llm_output, @@ -1331,15 +1466,20 @@ def _generate( messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, + *, + output_version: str = "v0", **kwargs: Any, ) -> ChatResult: """Top Level call.""" + # Concrete implementations should override this method and use the same params async def _agenerate( self, messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + *, + output_version: str = "v0", **kwargs: Any, ) -> ChatResult: """Top Level call.""" @@ -1349,6 +1489,7 @@ async def _agenerate( messages, stop, run_manager.get_sync() if run_manager else None, + output_version=output_version, **kwargs, ) @@ -1357,6 +1498,8 @@ def _stream( messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, + *, + output_version: str = "v0", **kwargs: Any, ) -> Iterator[ChatGenerationChunk]: raise NotImplementedError @@ -1366,6 +1509,8 @@ async def _astream( messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + *, + output_version: str = "v0", **kwargs: Any, ) -> AsyncIterator[ChatGenerationChunk]: iterator = await run_in_executor( @@ -1374,6 +1519,7 @@ async def _astream( messages, stop, run_manager.get_sync() if run_manager else None, + output_version=output_version, **kwargs, ) done = object() @@ -1720,6 +1866,9 @@ def _generate( messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, + *, + # For backward compatibility + output_version: str = "v0", # noqa: ARG002 **kwargs: Any, ) -> ChatResult: output_str = self._call(messages, stop=stop, run_manager=run_manager, **kwargs) @@ -1742,6 +1891,8 @@ async def _agenerate( messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + *, + output_version: str = "v0", **kwargs: Any, ) -> ChatResult: return await run_in_executor( @@ -1750,6 +1901,7 @@ async def _agenerate( messages, stop=stop, run_manager=run_manager.get_sync() if run_manager else None, + output_version=output_version, **kwargs, ) diff --git a/libs/core/langchain_core/utils/_merge.py b/libs/core/langchain_core/utils/_merge.py index 7b8465e8d0256..db6b5665ade39 100644 --- a/libs/core/langchain_core/utils/_merge.py +++ b/libs/core/langchain_core/utils/_merge.py @@ -44,6 +44,17 @@ def merge_dicts(left: dict[str, Any], *others: dict[str, Any]) -> dict[str, Any] ) raise TypeError(msg) elif isinstance(merged[right_k], str): + # Special handling for output_version - it should be consistent + if right_k == "output_version": + if merged[right_k] == right_v: + continue + msg = ( + "Unable to merge. Two different values seen for " + f"'output_version': {merged[right_k]} and {right_v}. " + "'output_version' should have the same value across " + "all chunks in a generation." + ) + raise ValueError(msg) # TODO: Add below special handling for 'type' key in 0.3 and remove # merge_lists 'type' logic. # @@ -58,8 +69,7 @@ def merge_dicts(left: dict[str, Any], *others: dict[str, Any]) -> dict[str, Any] # "all dicts." # ) if (right_k == "index" and merged[right_k].startswith("lc_")) or ( - right_k in ("id", "output_version", "model_provider") - and merged[right_k] == right_v + right_k in ("id", "model_provider") and merged[right_k] == right_v ): continue merged[right_k] += right_v diff --git a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py index 0e1944534d07d..ad4d7e3a00a40 100644 --- a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py +++ b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py @@ -28,11 +28,17 @@ def test_generic_fake_chat_model_invoke() -> None: infinite_cycle = cycle([AIMessage(content="hello"), AIMessage(content="goodbye")]) model = GenericFakeChatModel(messages=infinite_cycle) response = model.invoke("meow") - assert response == _any_id_ai_message(content="hello") + assert response == _any_id_ai_message( + content="hello", additional_kwargs={"output_version": "v0"} + ) response = model.invoke("kitty") - assert response == _any_id_ai_message(content="goodbye") + assert response == _any_id_ai_message( + content="goodbye", additional_kwargs={"output_version": "v0"} + ) response = model.invoke("meow") - assert response == _any_id_ai_message(content="hello") + assert response == _any_id_ai_message( + content="hello", additional_kwargs={"output_version": "v0"} + ) async def test_generic_fake_chat_model_ainvoke() -> None: @@ -40,11 +46,17 @@ async def test_generic_fake_chat_model_ainvoke() -> None: infinite_cycle = cycle([AIMessage(content="hello"), AIMessage(content="goodbye")]) model = GenericFakeChatModel(messages=infinite_cycle) response = await model.ainvoke("meow") - assert response == _any_id_ai_message(content="hello") + assert response == _any_id_ai_message( + content="hello", additional_kwargs={"output_version": "v0"} + ) response = await model.ainvoke("kitty") - assert response == _any_id_ai_message(content="goodbye") + assert response == _any_id_ai_message( + content="goodbye", additional_kwargs={"output_version": "v0"} + ) response = await model.ainvoke("meow") - assert response == _any_id_ai_message(content="hello") + assert response == _any_id_ai_message( + content="hello", additional_kwargs={"output_version": "v0"} + ) async def test_generic_fake_chat_model_stream() -> None: @@ -57,17 +69,33 @@ async def test_generic_fake_chat_model_stream() -> None: model = GenericFakeChatModel(messages=infinite_cycle) chunks = [chunk async for chunk in model.astream("meow")] assert chunks == [ - _any_id_ai_message_chunk(content="hello"), - _any_id_ai_message_chunk(content=" "), - _any_id_ai_message_chunk(content="goodbye", chunk_position="last"), + _any_id_ai_message_chunk( + content="hello", additional_kwargs={"output_version": "v0"} + ), + _any_id_ai_message_chunk( + content=" ", additional_kwargs={"output_version": "v0"} + ), + _any_id_ai_message_chunk( + content="goodbye", + chunk_position="last", + additional_kwargs={"output_version": "v0"}, + ), ] assert len({chunk.id for chunk in chunks}) == 1 chunks = list(model.stream("meow")) assert chunks == [ - _any_id_ai_message_chunk(content="hello"), - _any_id_ai_message_chunk(content=" "), - _any_id_ai_message_chunk(content="goodbye", chunk_position="last"), + _any_id_ai_message_chunk( + content="hello", additional_kwargs={"output_version": "v0"} + ), + _any_id_ai_message_chunk( + content=" ", additional_kwargs={"output_version": "v0"} + ), + _any_id_ai_message_chunk( + content="goodbye", + chunk_position="last", + additional_kwargs={"output_version": "v0"}, + ), ] assert len({chunk.id for chunk in chunks}) == 1 @@ -77,8 +105,12 @@ async def test_generic_fake_chat_model_stream() -> None: model = GenericFakeChatModel(messages=cycle([message])) chunks = [chunk async for chunk in model.astream("meow")] assert chunks == [ - _any_id_ai_message_chunk(content="", additional_kwargs={"foo": 42}), - _any_id_ai_message_chunk(content="", additional_kwargs={"bar": 24}), + _any_id_ai_message_chunk( + content="", additional_kwargs={"foo": 42, "output_version": "v0"} + ), + _any_id_ai_message_chunk( + content="", additional_kwargs={"bar": 24, "output_version": "v0"} + ), _any_id_ai_message_chunk(content="", chunk_position="last"), ] assert len({chunk.id for chunk in chunks}) == 1 @@ -99,21 +131,30 @@ async def test_generic_fake_chat_model_stream() -> None: assert chunks == [ _any_id_ai_message_chunk( content="", - additional_kwargs={"function_call": {"name": "move_file"}}, + additional_kwargs={ + "function_call": {"name": "move_file"}, + "output_version": "v0", + }, ), _any_id_ai_message_chunk( content="", additional_kwargs={ "function_call": {"arguments": '{\n "source_path": "foo"'}, + "output_version": "v0", }, ), _any_id_ai_message_chunk( - content="", additional_kwargs={"function_call": {"arguments": ","}} + content="", + additional_kwargs={ + "function_call": {"arguments": ","}, + "output_version": "v0", + }, ), _any_id_ai_message_chunk( content="", additional_kwargs={ "function_call": {"arguments": '\n "destination_path": "bar"\n}'}, + "output_version": "v0", }, ), _any_id_ai_message_chunk(content="", chunk_position="last"), @@ -134,7 +175,8 @@ async def test_generic_fake_chat_model_stream() -> None: "name": "move_file", "arguments": '{\n "source_path": "foo",\n "' 'destination_path": "bar"\n}', - } + }, + "output_version": "v0", }, id=chunks[0].id, chunk_position="last", @@ -150,9 +192,17 @@ async def test_generic_fake_chat_model_astream_log() -> None: ] final = log_patches[-1] assert final.state["streamed_output"] == [ - _any_id_ai_message_chunk(content="hello"), - _any_id_ai_message_chunk(content=" "), - _any_id_ai_message_chunk(content="goodbye", chunk_position="last"), + _any_id_ai_message_chunk( + content="hello", additional_kwargs={"output_version": "v0"} + ), + _any_id_ai_message_chunk( + content=" ", additional_kwargs={"output_version": "v0"} + ), + _any_id_ai_message_chunk( + content="goodbye", + chunk_position="last", + additional_kwargs={"output_version": "v0"}, + ), ] assert len({chunk.id for chunk in final.state["streamed_output"]}) == 1 @@ -207,9 +257,17 @@ async def on_llm_new_token( ) ] assert results == [ - _any_id_ai_message_chunk(content="hello"), - _any_id_ai_message_chunk(content=" "), - _any_id_ai_message_chunk(content="goodbye", chunk_position="last"), + _any_id_ai_message_chunk( + content="hello", additional_kwargs={"output_version": "v0"} + ), + _any_id_ai_message_chunk( + content=" ", additional_kwargs={"output_version": "v0"} + ), + _any_id_ai_message_chunk( + content="goodbye", + chunk_position="last", + additional_kwargs={"output_version": "v0"}, + ), ] assert tokens == ["hello", " ", "goodbye"] assert len({chunk.id for chunk in results}) == 1 @@ -221,17 +279,19 @@ def test_chat_model_inputs() -> None: assert cast("HumanMessage", fake.invoke("hello")) == _any_id_human_message( content="hello" ) - assert fake.invoke([("ai", "blah")]) == _any_id_ai_message(content="blah") + assert fake.invoke([("ai", "blah")]) == _any_id_ai_message( + content="blah", additional_kwargs={"output_version": "v0"} + ) assert fake.invoke([AIMessage(content="blah")]) == _any_id_ai_message( - content="blah" + content="blah", additional_kwargs={"output_version": "v0"} ) def test_fake_list_chat_model_batch() -> None: expected = [ - _any_id_ai_message(content="a"), - _any_id_ai_message(content="b"), - _any_id_ai_message(content="c"), + _any_id_ai_message(content="a", additional_kwargs={"output_version": "v0"}), + _any_id_ai_message(content="b", additional_kwargs={"output_version": "v0"}), + _any_id_ai_message(content="c", additional_kwargs={"output_version": "v0"}), ] for _ in range(20): # run this 20 times to test race condition in batch diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index 1176d683f1130..5d1e775b81722 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -7,7 +7,10 @@ import pytest from typing_extensions import override -from langchain_core.callbacks import CallbackManagerForLLMRun +from langchain_core.callbacks import ( + AsyncCallbackManagerForLLMRun, + CallbackManagerForLLMRun, +) from langchain_core.language_models import ( BaseChatModel, FakeListChatModel, @@ -185,6 +188,8 @@ def _generate( messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, + *, + output_version: str = "v0", **kwargs: Any, ) -> ChatResult: """Top Level call.""" @@ -203,10 +208,14 @@ def _llm_type(self) -> str: # is not strictly correct. # LangChain documents a pattern of adding BaseMessageChunks to accumulate a stream. # This may be better done with `reduce(operator.add, chunks)`. - assert chunks == [_any_id_ai_message(content="hello")] + assert chunks == [ + _any_id_ai_message(content="hello", additional_kwargs={"output_version": "v0"}) + ] chunks = [chunk async for chunk in model.astream("anything")] - assert chunks == [_any_id_ai_message(content="hello")] + assert chunks == [ + _any_id_ai_message(content="hello", additional_kwargs={"output_version": "v0"}) + ] async def test_astream_implementation_fallback_to_stream() -> None: @@ -218,6 +227,8 @@ def _generate( messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, + *, + output_version: str = "v0", **kwargs: Any, ) -> ChatResult: """Top Level call.""" @@ -229,6 +240,8 @@ def _stream( messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, + *, + output_version: str = "v0", **kwargs: Any, ) -> Iterator[ChatGenerationChunk]: """Stream the output of the model.""" @@ -245,18 +258,26 @@ def _llm_type(self) -> str: chunks = list(model.stream("anything")) assert chunks == [ _any_id_ai_message_chunk( - content="a", + content="a", additional_kwargs={"output_version": "v0"} + ), + _any_id_ai_message_chunk( + content="b", + chunk_position="last", + additional_kwargs={"output_version": "v0"}, ), - _any_id_ai_message_chunk(content="b", chunk_position="last"), ] assert len({chunk.id for chunk in chunks}) == 1 assert type(model)._astream == BaseChatModel._astream astream_chunks = [chunk async for chunk in model.astream("anything")] assert astream_chunks == [ _any_id_ai_message_chunk( - content="a", + content="a", additional_kwargs={"output_version": "v0"} + ), + _any_id_ai_message_chunk( + content="b", + chunk_position="last", + additional_kwargs={"output_version": "v0"}, ), - _any_id_ai_message_chunk(content="b", chunk_position="last"), ] assert len({chunk.id for chunk in astream_chunks}) == 1 @@ -270,6 +291,8 @@ def _generate( messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, + *, + output_version: str = "v0", **kwargs: Any, ) -> ChatResult: """Top Level call.""" @@ -280,7 +303,9 @@ async def _astream( self, messages: list[BaseMessage], stop: Optional[list[str]] = None, - run_manager: Optional[CallbackManagerForLLMRun] = None, # type: ignore[override] + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + *, + output_version: Optional[str] = "v0", **kwargs: Any, ) -> AsyncIterator[ChatGenerationChunk]: """Stream the output of the model.""" @@ -297,9 +322,13 @@ def _llm_type(self) -> str: chunks = [chunk async for chunk in model.astream("anything")] assert chunks == [ _any_id_ai_message_chunk( - content="a", + content="a", additional_kwargs={"output_version": "v0"} + ), + _any_id_ai_message_chunk( + content="b", + chunk_position="last", + additional_kwargs={"output_version": "v0"}, ), - _any_id_ai_message_chunk(content="b", chunk_position="last"), ] assert len({chunk.id for chunk in chunks}) == 1 diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_output_version.py b/libs/core/tests/unit_tests/language_models/chat_models/test_output_version.py new file mode 100644 index 0000000000000..daa4c2e822093 --- /dev/null +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_output_version.py @@ -0,0 +1,341 @@ +"""Test output_version functionality in BaseChatModel.""" + +from collections.abc import AsyncIterator, Iterator +from typing import Any, Optional +from unittest.mock import patch + +import pytest +from pydantic import ConfigDict +from typing_extensions import override + +from langchain_core.callbacks import CallbackManagerForLLMRun +from langchain_core.language_models import BaseChatModel +from langchain_core.language_models.fake_chat_models import GenericFakeChatModel +from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage, HumanMessage +from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult + + +class OutputVersionTrackingChatModel(GenericFakeChatModel): + """Chat model that tracks output_version parameter for testing.""" + + model_config = ConfigDict(extra="allow") + last_output_version: Optional[str] = None + + @override + def _generate( + self, + messages: list[BaseMessage], + stop: Optional[list[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + *, + output_version: str = "v0", + **kwargs: Any, + ) -> ChatResult: + """Store the output_version that was passed.""" + self.last_output_version = output_version + message = AIMessage(content="test response") + generation = ChatGeneration(message=message) + return ChatResult(generations=[generation]) + + @override + def _stream( + self, + messages: list[BaseMessage], + stop: Optional[list[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + *, + output_version: str = "v0", + **kwargs: Any, + ) -> Iterator[ChatGenerationChunk]: + """Store the output_version that was passed.""" + self.last_output_version = output_version + yield ChatGenerationChunk(message=AIMessageChunk(content="test")) + yield ChatGenerationChunk(message=AIMessageChunk(content=" stream")) + + @override + async def _astream( + self, + messages: list[BaseMessage], + stop: Optional[list[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, # type: ignore[override] + *, + output_version: str = "v0", + **kwargs: Any, + ) -> AsyncIterator[ChatGenerationChunk]: + """Store the output_version that was passed.""" + self.last_output_version = output_version + yield ChatGenerationChunk(message=AIMessageChunk(content="async")) + yield ChatGenerationChunk(message=AIMessageChunk(content=" stream")) + + +@pytest.fixture +def messages() -> list[BaseMessage]: + return [HumanMessage("Hello")] + + +class TestOutputVersionPassing: + """Test that output_version parameter is correctly passed to model methods.""" + + @pytest.mark.parametrize( + ("method_name", "default_version", "provided_version", "expected_version"), + [ + # Test invoke + ("invoke", "v1", None, "v1"), # Uses default when not provided + ("invoke", "v0", "v1", "v1"), # Uses provided version + # Test stream + ("stream", "v1", None, "v1"), # Uses default when not provided + ("stream", "v1", "v2", "v2"), # Uses provided version + ], + ) + def test_sync_methods_output_version( + self, + messages: list[BaseMessage], + method_name: str, + default_version: str, + provided_version: Optional[str], + expected_version: str, + ) -> None: + """Test sync methods handle output_version correctly.""" + model = OutputVersionTrackingChatModel( + messages=iter(["test response"]), output_version=default_version + ) + method = getattr(model, method_name) + + if provided_version is not None: + if method_name == "stream": + list(method(messages, output_version=provided_version)) + else: + method(messages, output_version=provided_version) + elif method_name == "stream": + list(method(messages)) + else: + method(messages) + + assert model.last_output_version == expected_version + + @pytest.mark.parametrize( + ("method_name", "default_version", "provided_version", "expected_version"), + [ + # Test ainvoke + ("ainvoke", "v1", None, "v1"), # Uses default when not provided + ("ainvoke", "v0", "v1", "v1"), # Uses provided version + # Test astream + ("astream", "v1", None, "v1"), # Uses default when not provided + ("astream", "v1", "v0", "v0"), # Uses provided version + ], + ) + async def test_async_methods_output_version( + self, + messages: list[BaseMessage], + method_name: str, + default_version: str, + provided_version: Optional[str], + expected_version: str, + ) -> None: + """Test async methods handle output_version correctly.""" + model = OutputVersionTrackingChatModel( + messages=iter(["test response"]), output_version=default_version + ) + method = getattr(model, method_name) + + if provided_version is not None: + if method_name == "astream": + async for _ in method(messages, output_version=provided_version): + pass + else: + await method(messages, output_version=provided_version) + elif method_name == "astream": + async for _ in method(messages): + pass + else: + await method(messages) + + assert model.last_output_version == expected_version + + +class TestStreamFallback: + """Test stream fallback behavior with output_version.""" + + def test_stream_fallback_to_invoke_passes_output_version( + self, + messages: list[BaseMessage], + ) -> None: + """Test `stream()` fallback passes `output_version` correctly.""" + + class NoStreamModel(BaseChatModel): + model_config = ConfigDict(extra="allow") + last_output_version: Optional[str] = None + + @override + def _generate( + self, + messages: list[BaseMessage], + stop: Optional[list[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + *, + output_version: str = "v0", + **kwargs: Any, + ) -> ChatResult: + self.last_output_version = output_version + message = AIMessage(content="test response") + generation = ChatGeneration(message=message) + return ChatResult(generations=[generation]) + + @property + def _llm_type(self) -> str: + return "no-stream-model" + + model = NoStreamModel(output_version="v1") + # Stream should fallback to invoke and pass the output_version + list(model.stream(messages, output_version="v2")) + assert model.last_output_version == "v2" + + async def test_astream_fallback_to_ainvoke_passes_output_version( + self, + messages: list[BaseMessage], + ) -> None: + """Test `astream()` fallback passes `output_version` correctly.""" + + class NoStreamModel(BaseChatModel): + model_config = ConfigDict(extra="allow") + last_output_version: Optional[str] = None + + @override + def _generate( + self, + messages: list[BaseMessage], + stop: Optional[list[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + *, + output_version: str = "v0", + **kwargs: Any, + ) -> ChatResult: + self.last_output_version = output_version + message = AIMessage(content="test response") + generation = ChatGeneration(message=message) + return ChatResult(generations=[generation]) + + @property + def _llm_type(self) -> str: + return "no-stream-model" + + model = NoStreamModel(output_version="v1") + # astream should fallback to ainvoke and pass the output_version + async for _ in model.astream(messages, output_version="v2"): + pass + assert model.last_output_version == "v2" + + +class TestOutputVersionInMessages: + """Test output_version is added to message additional_kwargs.""" + + def test_output_version_added_to_message_additional_kwargs( + self, + messages: list[BaseMessage], + ) -> None: + """Test that output_version is added to message additional_kwargs.""" + model = OutputVersionTrackingChatModel( + messages=iter(["test response"]), output_version="v1" + ) + result = model.invoke(messages, output_version="v2") + assert result.additional_kwargs["output_version"] == "v2" + + def test_output_version_added_to_stream_message_additional_kwargs( + self, + messages: list[BaseMessage], + ) -> None: + """Test that output_version is added to streamed message additional_kwargs.""" + model = OutputVersionTrackingChatModel( + messages=iter(["test response"]), output_version="v1" + ) + chunks = list(model.stream(messages, output_version="v2")) + + # Check that content chunks (not the "last" chunk) have the output_version + content_chunks = [chunk for chunk in chunks if chunk.content] + assert len(content_chunks) >= 1 # Should have at least one content chunk + + for chunk in content_chunks: + assert "output_version" in chunk.additional_kwargs + assert chunk.additional_kwargs["output_version"] == "v2" + + async def test_output_version_added_to_astream_message_additional_kwargs( + self, + messages: list[BaseMessage], + ) -> None: + """Test output_version added to async streamed additional_kwargs.""" + model = OutputVersionTrackingChatModel( + messages=iter(["test response"]), output_version="v1" + ) + chunks = [chunk async for chunk in model.astream(messages, output_version="v2")] + + # Check that content chunks (not the "last" chunk) have the output_version + content_chunks = [chunk for chunk in chunks if chunk.content] + assert len(content_chunks) >= 1 # Should have at least one content chunk + + for chunk in content_chunks: + assert "output_version" in chunk.additional_kwargs + assert chunk.additional_kwargs["output_version"] == "v2" + + +class TestOutputVersionMerging: + """Test output_version handling in merge operations.""" + + def test_output_version_consistency_in_merge(self) -> None: + """Test that merge_dicts raises error for inconsistent output_version.""" + from langchain_core.utils._merge import merge_dicts + + left_dict = {"output_version": "v1"} + right_dict = {"output_version": "v2"} + + with pytest.raises(ValueError, match="Unable to merge.*output_version"): + merge_dicts(left_dict, right_dict) + + def test_output_version_merge_same_value(self) -> None: + """Test that merge_dicts works fine when output_version values are same.""" + from langchain_core.utils._merge import merge_dicts + + left_dict = {"output_version": "v1", "other": "data1"} + right_dict = {"output_version": "v1", "more": "data2"} + + result = merge_dicts(left_dict, right_dict) + assert result["output_version"] == "v1" + assert result["other"] == "data1" + assert result["more"] == "data2" + + +class TestBackwardsCompatibility: + """Test backwards compatibility features.""" + + def test_backwards_compatibility_with_v0_default( + self, + messages: list[BaseMessage], + ) -> None: + """Test that models default to v0 for backward compatibility.""" + model = OutputVersionTrackingChatModel( + messages=iter(["test response"]) + ) # Don't specify output_version + model.invoke(messages) + # The default should be v0 for backward compatibility + assert model.last_output_version == "v0" + + def test_output_version_preserved_through_chain_calls( + self, + messages: list[BaseMessage], + ) -> None: + """Test that output_version is preserved through internal method calls.""" + model = OutputVersionTrackingChatModel( + messages=iter(["test response"]), output_version="v1" + ) + + # Test both with explicit and implicit (None) output_version + with patch.object( + model, "_generate_with_cache", wraps=model._generate_with_cache + ) as mock_cache: + model.invoke(messages, output_version="v2") + # Verify the internal call received the right output_version + mock_cache.assert_called_once() + call_kwargs = mock_cache.call_args[1] + assert call_kwargs.get("_output_version") == "v2" + + # Verify the model implementation received the correct output_version + assert model.last_output_version == "v2" diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_rate_limiting.py b/libs/core/tests/unit_tests/language_models/chat_models/test_rate_limiting.py index 0411915c26925..bf8cb1ab3ddf0 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_rate_limiting.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_rate_limiting.py @@ -216,7 +216,8 @@ def test_rate_limit_skips_cache() -> None: '[{"lc": 1, "type": "constructor", "id": ["langchain", "schema", ' '"messages", "HumanMessage"], "kwargs": {"content": "foo", ' '"type": "human"}}]', - "[('_type', 'generic-fake-chat-model'), ('stop', None)]", + "[('_output_version', 'v0'), ('_type', 'generic-fake-chat-model'), " + "('stop', None)]", ) ] diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index b5348ce867d02..6ba099f130a95 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -483,273 +483,6 @@ RunTree(id=00000000-0000-4000-8000-000000000000, name='RunnableSequence', run_type='chain', dotted_order='20230101T000000000000Z00000000-0000-4000-8000-000000000000'), ]) # --- -# name: test_configurable_fields[schema2] - dict({ - '$defs': dict({ - 'Configurable': dict({ - 'properties': dict({ - 'llm_responses': dict({ - 'default': list([ - 'a', - ]), - 'description': 'A list of fake responses for this LLM', - 'items': dict({ - 'type': 'string', - }), - 'title': 'LLM Responses', - 'type': 'array', - }), - }), - 'title': 'Configurable', - 'type': 'object', - }), - }), - 'properties': dict({ - 'configurable': dict({ - '$ref': '#/$defs/Configurable', - }), - }), - 'title': 'RunnableConfigurableFieldsConfig', - 'type': 'object', - }) -# --- -# name: test_configurable_fields[schema3] - dict({ - '$defs': dict({ - 'Configurable': dict({ - 'properties': dict({ - 'prompt_template': dict({ - 'default': 'Hello, {name}!', - 'description': 'The prompt template for this chain', - 'title': 'Prompt Template', - 'type': 'string', - }), - }), - 'title': 'Configurable', - 'type': 'object', - }), - }), - 'properties': dict({ - 'configurable': dict({ - '$ref': '#/$defs/Configurable', - }), - }), - 'title': 'RunnableConfigurableFieldsConfig', - 'type': 'object', - }) -# --- -# name: test_configurable_fields[schema4] - dict({ - '$defs': dict({ - 'Configurable': dict({ - 'properties': dict({ - 'llm_responses': dict({ - 'default': list([ - 'a', - ]), - 'description': 'A list of fake responses for this LLM', - 'items': dict({ - 'type': 'string', - }), - 'title': 'LLM Responses', - 'type': 'array', - }), - 'prompt_template': dict({ - 'default': 'Hello, {name}!', - 'description': 'The prompt template for this chain', - 'title': 'Prompt Template', - 'type': 'string', - }), - }), - 'title': 'Configurable', - 'type': 'object', - }), - }), - 'properties': dict({ - 'configurable': dict({ - '$ref': '#/$defs/Configurable', - }), - }), - 'title': 'RunnableSequenceConfig', - 'type': 'object', - }) -# --- -# name: test_configurable_fields[schema5] - dict({ - '$defs': dict({ - 'Configurable': dict({ - 'properties': dict({ - 'llm_responses': dict({ - 'default': list([ - 'a', - ]), - 'description': 'A list of fake responses for this LLM', - 'items': dict({ - 'type': 'string', - }), - 'title': 'LLM Responses', - 'type': 'array', - }), - 'other_responses': dict({ - 'default': list([ - 'a', - ]), - 'items': dict({ - 'type': 'string', - }), - 'title': 'Other Responses', - 'type': 'array', - }), - 'prompt_template': dict({ - 'default': 'Hello, {name}!', - 'description': 'The prompt template for this chain', - 'title': 'Prompt Template', - 'type': 'string', - }), - }), - 'title': 'Configurable', - 'type': 'object', - }), - }), - 'properties': dict({ - 'configurable': dict({ - '$ref': '#/$defs/Configurable', - }), - }), - 'title': 'RunnableSequenceConfig', - 'type': 'object', - }) -# --- -# name: test_configurable_fields_example[schema7] - dict({ - '$defs': dict({ - 'Chat_Responses': dict({ - 'title': 'Chat Responses', - }), - 'Configurable': dict({ - 'properties': dict({ - 'chat_responses': dict({ - 'default': list([ - 'hello', - 'bye', - ]), - 'items': dict({ - '$ref': '#/$defs/Chat_Responses', - }), - 'title': 'Chat Responses', - 'type': 'array', - }), - 'llm': dict({ - '$ref': '#/$defs/LLM', - 'default': 'default', - }), - 'llm_responses': dict({ - 'default': list([ - 'a', - ]), - 'description': 'A list of fake responses for this LLM', - 'items': dict({ - 'type': 'string', - }), - 'title': 'LLM Responses', - 'type': 'array', - }), - 'prompt_template': dict({ - '$ref': '#/$defs/Prompt_Template', - 'default': 'hello', - 'description': 'The prompt template for this chain', - }), - }), - 'title': 'Configurable', - 'type': 'object', - }), - 'LLM': dict({ - 'title': 'LLM', - }), - 'Prompt_Template': dict({ - 'title': 'Prompt Template', - }), - }), - 'properties': dict({ - 'configurable': dict({ - '$ref': '#/$defs/Configurable', - }), - }), - 'title': 'RunnableSequenceConfig', - 'type': 'object', - }) -# --- -# name: test_configurable_fields_prefix_keys[schema6] - dict({ - 'definitions': dict({ - 'Chat_Responses': dict({ - 'title': 'Chat Responses', - }), - 'Configurable': dict({ - 'properties': dict({ - 'chat_sleep': dict({ - 'anyOf': list([ - dict({ - 'type': 'number', - }), - dict({ - 'type': 'null', - }), - ]), - 'default': None, - 'title': 'Chat Sleep', - }), - 'llm': dict({ - '$ref': '#/definitions/LLM', - 'default': 'default', - }), - 'llm==chat/responses': dict({ - 'default': list([ - 'hello', - 'bye', - ]), - 'items': dict({ - '$ref': '#/definitions/Chat_Responses', - }), - 'title': 'Chat Responses', - 'type': 'array', - }), - 'llm==default/responses': dict({ - 'default': list([ - 'a', - ]), - 'description': 'A list of fake responses for this LLM', - 'items': dict({ - 'type': 'string', - }), - 'title': 'LLM Responses', - 'type': 'array', - }), - 'prompt_template': dict({ - '$ref': '#/definitions/Prompt_Template', - 'default': 'hello', - 'description': 'The prompt template for this chain', - }), - }), - 'title': 'Configurable', - 'type': 'object', - }), - 'LLM': dict({ - 'title': 'LLM', - }), - 'Prompt_Template': dict({ - 'title': 'Prompt Template', - }), - }), - 'properties': dict({ - 'configurable': dict({ - '$ref': '#/definitions/Configurable', - }), - }), - 'title': 'RunnableSequenceConfig', - 'type': 'object', - }) -# --- # name: test_each ''' { @@ -14154,178 +13887,3 @@ } ''' # --- -# name: test_seq_prompt_map - ''' - { - "lc": 1, - "type": "constructor", - "id": [ - "langchain", - "schema", - "runnable", - "RunnableSequence" - ], - "kwargs": { - "first": { - "lc": 1, - "type": "constructor", - "id": [ - "langchain", - "prompts", - "chat", - "ChatPromptTemplate" - ], - "kwargs": { - "input_variables": [ - "question" - ], - "messages": [ - { - "lc": 1, - "type": "constructor", - "id": [ - "langchain", - "prompts", - "chat", - "SystemMessagePromptTemplate" - ], - "kwargs": { - "prompt": { - "lc": 1, - "type": "constructor", - "id": [ - "langchain", - "prompts", - "prompt", - "PromptTemplate" - ], - "kwargs": { - "input_variables": [], - "template": "You are a nice assistant.", - "template_format": "f-string" - }, - "name": "PromptTemplate" - } - } - }, - { - "lc": 1, - "type": "constructor", - "id": [ - "langchain", - "prompts", - "chat", - "HumanMessagePromptTemplate" - ], - "kwargs": { - "prompt": { - "lc": 1, - "type": "constructor", - "id": [ - "langchain", - "prompts", - "prompt", - "PromptTemplate" - ], - "kwargs": { - "input_variables": [ - "question" - ], - "template": "{question}", - "template_format": "f-string" - }, - "name": "PromptTemplate" - } - } - } - ] - }, - "name": "ChatPromptTemplate" - }, - "middle": [ - { - "lc": 1, - "type": "not_implemented", - "id": [ - "langchain_core", - "runnables", - "base", - "RunnableLambda" - ], - "repr": "RunnableLambda(...)" - } - ], - "last": { - "lc": 1, - "type": "constructor", - "id": [ - "langchain", - "schema", - "runnable", - "RunnableParallel" - ], - "kwargs": { - "steps__": { - "chat": { - "lc": 1, - "type": "constructor", - "id": [ - "langchain", - "schema", - "runnable", - "RunnableBinding" - ], - "kwargs": { - "bound": { - "lc": 1, - "type": "not_implemented", - "id": [ - "langchain_core", - "language_models", - "fake_chat_models", - "FakeListChatModel" - ], - "repr": "FakeListChatModel(output_version='v0', responses=[\"i'm a chatbot\"])", - "name": "FakeListChatModel" - }, - "kwargs": { - "stop": [ - "Thought:" - ] - }, - "config": {} - }, - "name": "FakeListChatModel" - }, - "llm": { - "lc": 1, - "type": "not_implemented", - "id": [ - "langchain_core", - "language_models", - "fake", - "FakeListLLM" - ], - "repr": "FakeListLLM(responses=[\"i'm a textbot\"])", - "name": "FakeListLLM" - }, - "passthrough": { - "lc": 1, - "type": "not_implemented", - "id": [ - "langchain_core", - "runnables", - "base", - "RunnableLambda" - ], - "repr": "RunnableLambda(...)" - } - } - }, - "name": "RunnableParallel" - } - }, - "name": "RunnableSequence" - } - ''' -# --- diff --git a/libs/core/tests/unit_tests/runnables/test_runnable.py b/libs/core/tests/unit_tests/runnables/test_runnable.py index 65b6ea5d4f9bd..93b8b206dcada 100644 --- a/libs/core/tests/unit_tests/runnables/test_runnable.py +++ b/libs/core/tests/unit_tests/runnables/test_runnable.py @@ -1862,7 +1862,11 @@ def test_prompt_with_chat_model( ] == [ _any_id_ai_message_chunk(content="f"), _any_id_ai_message_chunk(content="o"), - _any_id_ai_message_chunk(content="o", chunk_position="last"), + _any_id_ai_message_chunk( + content="o", + chunk_position="last", + additional_kwargs={"output_version": "v0"}, + ), ] assert prompt_spy.call_args.args[1] == {"question": "What is your name?"} assert chat_spy.call_args.args[1] == ChatPromptValue( @@ -1971,7 +1975,11 @@ async def test_prompt_with_chat_model_async( ] == [ _any_id_ai_message_chunk(content="f"), _any_id_ai_message_chunk(content="o"), - _any_id_ai_message_chunk(content="o", chunk_position="last"), + _any_id_ai_message_chunk( + content="o", + chunk_position="last", + additional_kwargs={"output_version": "v0"}, + ), ] assert prompt_spy.call_args.args[1] == {"question": "What is your name?"} assert chat_spy.call_args.args[1] == ChatPromptValue( diff --git a/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py b/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py index 2dc16821f2b53..fc83ecbfa8af1 100644 --- a/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py +++ b/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py @@ -1887,7 +1887,9 @@ async def test_events_astream_config() -> None: ) model_02 = model.with_config({"configurable": {"messages": good_world_on_repeat}}) - assert model_02.invoke("hello") == AIMessage(content="Goodbye world", id="ai2") + assert model_02.invoke("hello") == AIMessage( + content="Goodbye world", additional_kwargs={"output_version": "v0"}, id="ai2" + ) events = await _collect_events(model_02.astream_events("hello", version="v1")) _assert_events_equal_allow_superset_metadata( @@ -2013,7 +2015,9 @@ def get_by_session_id(session_id: str) -> BaseChatMessageHistory: assert store == { "session-123": [ HumanMessage(content="hello"), - AIMessage(content="hello", id="ai3"), + AIMessage( + content="hello", additional_kwargs={"output_version": "v0"}, id="ai3" + ), ] } diff --git a/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py b/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py index 536da3665ed9e..784c1548b788a 100644 --- a/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py +++ b/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py @@ -1839,7 +1839,9 @@ async def test_events_astream_config() -> None: ) model_02 = model.with_config({"configurable": {"messages": good_world_on_repeat}}) - assert model_02.invoke("hello") == AIMessage(content="Goodbye world", id="ai2") + assert model_02.invoke("hello") == AIMessage( + content="Goodbye world", additional_kwargs={"output_version": "v0"}, id="ai2" + ) events = await _collect_events(model_02.astream_events("hello", version="v2")) _assert_events_equal_allow_superset_metadata( @@ -1984,7 +1986,9 @@ def _get_output_messages(*args: Any, **kwargs: Any) -> Any: assert store == { "session-123": [ HumanMessage(content="hello"), - AIMessage(content="hello", id="ai3"), + AIMessage( + content="hello", additional_kwargs={"output_version": "v0"}, id="ai3" + ), ] } From f1b676c0ee0736503b9ca3b48aab3c6fcdd39df4 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 26 Aug 2025 13:54:14 -0400 Subject: [PATCH 52/73] . --- .../language_models/chat_models.py | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index c139f63aac18f..e96992a77b482 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -632,7 +632,11 @@ def stream( chunk.message.id = run_id chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) output_version = kwargs["_output_version"] - if isinstance(chunk.message, (AIMessage, AIMessageChunk)): + if isinstance(chunk.message, (AIMessage, AIMessageChunk)) and ( + not isinstance(chunk.message, AIMessageChunk) + or chunk.message.chunk_position != "last" + or chunk.message.content # Include last chunks with content + ): chunk.message.additional_kwargs["output_version"] = ( output_version ) @@ -781,7 +785,11 @@ async def astream( chunk.message.id = run_id chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) output_version = kwargs["_output_version"] - if isinstance(chunk.message, (AIMessage, AIMessageChunk)): + if isinstance(chunk.message, (AIMessage, AIMessageChunk)) and ( + not isinstance(chunk.message, AIMessageChunk) + or chunk.message.chunk_position != "last" + or chunk.message.content # Include last chunks with content + ): chunk.message.additional_kwargs["output_version"] = output_version if output_version == "v1": # Overwrite .content with .content_blocks @@ -1268,7 +1276,11 @@ def _generate_with_cache( messages, stop=stop, output_version=output_version, **kwargs ): chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) - if isinstance(chunk.message, (AIMessage, AIMessageChunk)): + if isinstance(chunk.message, (AIMessage, AIMessageChunk)) and ( + not isinstance(chunk.message, AIMessageChunk) + or chunk.message.chunk_position != "last" + or chunk.message.content # Include last chunks with content + ): chunk.message.additional_kwargs["output_version"] = output_version if run_manager: if chunk.message.id is None: @@ -1389,7 +1401,11 @@ async def _agenerate_with_cache( messages, stop=stop, output_version=output_version, **kwargs ): chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) - if isinstance(chunk.message, (AIMessage, AIMessageChunk)): + if isinstance(chunk.message, (AIMessage, AIMessageChunk)) and ( + not isinstance(chunk.message, AIMessageChunk) + or chunk.message.chunk_position != "last" + or chunk.message.content # Include last chunks with content + ): chunk.message.additional_kwargs["output_version"] = output_version if run_manager: if chunk.message.id is None: From 706ea1b7577a5fca26f44c7b969736e7ebb8ad5a Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 26 Aug 2025 13:54:22 -0400 Subject: [PATCH 53/73] . --- libs/core/tests/unit_tests/stubs.py | 34 +++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/libs/core/tests/unit_tests/stubs.py b/libs/core/tests/unit_tests/stubs.py index 5cd45afb41f48..332e8b9fe8723 100644 --- a/libs/core/tests/unit_tests/stubs.py +++ b/libs/core/tests/unit_tests/stubs.py @@ -29,6 +29,18 @@ def _any_id_document(**kwargs: Any) -> Document: def _any_id_ai_message(**kwargs: Any) -> AIMessage: """Create ai message with an any id field.""" + # Set default additional_kwargs to include output_version if not provided + if "additional_kwargs" not in kwargs: + kwargs["additional_kwargs"] = {"output_version": "v0"} + elif ( + isinstance(kwargs["additional_kwargs"], dict) + and "output_version" not in kwargs["additional_kwargs"] + ): + kwargs["additional_kwargs"] = { + **kwargs["additional_kwargs"], + "output_version": "v0", + } + message = AIMessage(**kwargs) message.id = AnyStr() return message @@ -36,6 +48,28 @@ def _any_id_ai_message(**kwargs: Any) -> AIMessage: def _any_id_ai_message_chunk(**kwargs: Any) -> AIMessageChunk: """Create ai message with an any id field.""" + # Only exclude output_version from last chunks that have empty content + # (synthetic chunks) + is_empty_last_chunk = ( + kwargs.get("chunk_position") == "last" + and not kwargs.get("content") + and "additional_kwargs" not in kwargs + ) + + # Set default additional_kwargs to include output_version if not provided and not + # an empty last chunk + if not is_empty_last_chunk: + if "additional_kwargs" not in kwargs: + kwargs["additional_kwargs"] = {"output_version": "v0"} + elif ( + isinstance(kwargs["additional_kwargs"], dict) + and "output_version" not in kwargs["additional_kwargs"] + ): + kwargs["additional_kwargs"] = { + **kwargs["additional_kwargs"], + "output_version": "v0", + } + message = AIMessageChunk(**kwargs) message.id = AnyStr() return message From 659d282d1d4d7e2f197f107d3b54680d8c880314 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Tue, 26 Aug 2025 13:55:44 -0400 Subject: [PATCH 54/73] standard tests: update multimodal tests --- .../integration_tests/chat_models.py | 39 ++++++------------- 1 file changed, 12 insertions(+), 27 deletions(-) diff --git a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py index 1750e8bdefdb4..f8024195e3990 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py +++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py @@ -315,8 +315,7 @@ def supports_json_mode(self) -> bool: { "type": "image", - "source_type": "base64", - "data": "", + "base64": "", "mime_type": "image/jpeg", # or appropriate mime-type } @@ -351,7 +350,6 @@ def supports_image_inputs(self) -> bool: { "type": "image", - "source_type": "url", "url": "https://...", } @@ -377,8 +375,7 @@ def supports_image_urls(self) -> bool: { "type": "file", - "source_type": "base64", - "data": "", + "base64": "", "mime_type": "application/pdf", } @@ -404,8 +401,7 @@ def supports_pdf_inputs(self) -> bool: { "type": "audio", - "source_type": "base64", - "data": "", + "base64": "", "mime_type": "audio/wav", # or appropriate mime-type } @@ -499,8 +495,7 @@ def supports_anthropic_inputs(self) -> bool: content=[ { "type": "image", - "source_type": "base64", - "data": image_data, + "base64": image_data, "mime_type": "image/jpeg", }, ], @@ -2306,8 +2301,7 @@ def test_pdf_inputs(self, model: BaseChatModel) -> None: { "type": "image", - "source_type": "base64", - "data": "", + "base64": "", "mime_type": "application/pdf", } @@ -2347,9 +2341,8 @@ def supports_pdf_inputs(self) -> bool: }, { "type": "file", - "source_type": "base64", "mime_type": "application/pdf", - "data": pdf_data, + "base64": pdf_data, }, ] ) @@ -2383,8 +2376,7 @@ def test_audio_inputs(self, model: BaseChatModel) -> None: { "type": "audio", - "source_type": "base64", - "data": "", + "base64": "", "mime_type": "audio/wav", # or appropriate mime-type } @@ -2424,9 +2416,8 @@ def supports_audio_inputs(self) -> bool: }, { "type": "audio", - "source_type": "base64", "mime_type": "audio/wav", - "data": audio_data, + "base64": audio_data, }, ] ) @@ -2457,8 +2448,7 @@ def test_image_inputs(self, model: BaseChatModel) -> None: { "type": "image", - "source_type": "base64", - "data": "", + "base64": "", "mime_type": "image/jpeg", # or appropriate mime-type } @@ -2484,7 +2474,6 @@ def test_image_inputs(self, model: BaseChatModel) -> None: { "type": "image", - "source_type": "url", "url": "", } @@ -2536,9 +2525,8 @@ def supports_image_urls(self) -> bool: {"type": "text", "text": "describe the weather in this image"}, { "type": "image", - "source_type": "base64", "mime_type": "image/jpeg", - "data": image_data, + "base64": image_data, }, ], ) @@ -2551,7 +2539,6 @@ def supports_image_urls(self) -> bool: {"type": "text", "text": "describe the weather in this image"}, { "type": "image", - "source_type": "url", "url": image_url, }, ], @@ -2586,8 +2573,7 @@ def test_image_tool_message(self, model: BaseChatModel) -> None: content=[ { "type": "image", - "source_type": "base64", - "data": image_data, + "base64": image_data, "mime_type": "image/jpeg", }, ], @@ -2642,8 +2628,7 @@ def supports_image_tool_message(self) -> bool: content=[ { "type": "image", - "source_type": "base64", - "data": image_data, + "base64": image_data, "mime_type": "image/jpeg", }, ], From 19d3a736464376fd7af2850a0c2007ce0f95d44b Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 26 Aug 2025 14:11:01 -0400 Subject: [PATCH 55/73] . --- .../language_models/chat_models.py | 42 ++++++++++++------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 553719730f7e6..06c68c340fab1 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -440,7 +440,7 @@ def invoke( effective_output_version = ( output_version if output_version is not None else self.output_version ) - kwargs["_output_version"] = effective_output_version + kwargs["_output_version"] = effective_output_version or "v0" return cast( "AIMessage", @@ -487,7 +487,7 @@ async def ainvoke( effective_output_version = ( output_version if output_version is not None else self.output_version ) - kwargs["_output_version"] = effective_output_version + kwargs["_output_version"] = effective_output_version or "v0" llm_result = await self.agenerate_prompt( [self._convert_input(input)], @@ -564,7 +564,7 @@ def stream( effective_output_version = ( output_version if output_version is not None else self.output_version ) - kwargs["_output_version"] = effective_output_version + kwargs["_output_version"] = effective_output_version or "v0" if not self._should_stream(async_api=False, **{**kwargs, "stream": True}): # model doesn't implement streaming, so use default implementation @@ -625,7 +625,7 @@ def stream( for chunk in self._stream( input_messages, stop=stop, - output_version=kwargs["_output_version"], + output_version=kwargs["_output_version"] or "v0", **kwargs, ): if chunk.message.id is None: @@ -638,7 +638,7 @@ def stream( or chunk.message.content # Include last chunks with content ): chunk.message.additional_kwargs["output_version"] = ( - output_version + output_version or "v0" ) if output_version == "v1": # Overwrite .content with .content_blocks @@ -715,7 +715,7 @@ async def astream( effective_output_version = ( output_version if output_version is not None else self.output_version ) - kwargs["_output_version"] = effective_output_version + kwargs["_output_version"] = effective_output_version or "v0" if not self._should_stream(async_api=True, **{**kwargs, "stream": True}): # No async or sync stream is implemented, so fall back to ainvoke @@ -778,7 +778,7 @@ async def astream( async for chunk in self._astream( input_messages, stop=stop, - output_version=kwargs["_output_version"], + output_version=kwargs["_output_version"] or "v0", **kwargs, ): if chunk.message.id is None: @@ -790,7 +790,9 @@ async def astream( or chunk.message.chunk_position != "last" or chunk.message.content # Include last chunks with content ): - chunk.message.additional_kwargs["output_version"] = output_version + chunk.message.additional_kwargs["output_version"] = ( + output_version or "v0" + ) if output_version == "v1": # Overwrite .content with .content_blocks chunk.message = _update_message_content_to_blocks( @@ -1273,7 +1275,7 @@ def _generate_with_cache( ) yielded = False for chunk in self._stream( - messages, stop=stop, output_version=output_version, **kwargs + messages, stop=stop, output_version=output_version or "v0", **kwargs ): chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) if isinstance(chunk.message, (AIMessage, AIMessageChunk)) and ( @@ -1281,7 +1283,9 @@ def _generate_with_cache( or chunk.message.chunk_position != "last" or chunk.message.content # Include last chunks with content ): - chunk.message.additional_kwargs["output_version"] = output_version + chunk.message.additional_kwargs["output_version"] = ( + output_version or "v0" + ) if run_manager: if chunk.message.id is None: chunk.message.id = run_id @@ -1318,12 +1322,12 @@ def _generate_with_cache( messages, stop=stop, run_manager=run_manager, - output_version=output_version, + output_version=output_version or "v0", **kwargs, ) else: result = self._generate( - messages, stop=stop, output_version=output_version, **kwargs + messages, stop=stop, output_version=output_version or "v0", **kwargs ) if output_version == "v1": @@ -1341,7 +1345,9 @@ def _generate_with_cache( generation ) if isinstance(generation.message, (AIMessage, AIMessageChunk)): - generation.message.additional_kwargs["output_version"] = output_version + generation.message.additional_kwargs["output_version"] = ( + output_version or "v0" + ) if len(result.generations) == 1 and result.llm_output is not None: result.generations[0].message.response_metadata = { **result.llm_output, @@ -1398,7 +1404,7 @@ async def _agenerate_with_cache( ) yielded = False async for chunk in self._astream( - messages, stop=stop, output_version=output_version, **kwargs + messages, stop=stop, output_version=output_version or "v0", **kwargs ): chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) if isinstance(chunk.message, (AIMessage, AIMessageChunk)) and ( @@ -1406,7 +1412,9 @@ async def _agenerate_with_cache( or chunk.message.chunk_position != "last" or chunk.message.content # Include last chunks with content ): - chunk.message.additional_kwargs["output_version"] = output_version + chunk.message.additional_kwargs["output_version"] = ( + output_version or "v0" + ) if run_manager: if chunk.message.id is None: chunk.message.id = run_id @@ -1466,7 +1474,9 @@ async def _agenerate_with_cache( generation ) if isinstance(generation.message, (AIMessage, AIMessageChunk)): - generation.message.additional_kwargs["output_version"] = output_version + generation.message.additional_kwargs["output_version"] = ( + output_version or "v0" + ) if len(result.generations) == 1 and result.llm_output is not None: result.generations[0].message.response_metadata = { **result.llm_output, From 72b2436b4930c65ca28e386693ae736973e85950 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 26 Aug 2025 14:12:04 -0400 Subject: [PATCH 56/73] ss --- .../__snapshots__/test_runnable.ambr | 720 +++++++----------- 1 file changed, 267 insertions(+), 453 deletions(-) diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index 57fd2962282ad..910c21479a870 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -483,6 +483,273 @@ RunTree(id=00000000-0000-4000-8000-000000000000, name='RunnableSequence', run_type='chain', dotted_order='20230101T000000000000Z00000000-0000-4000-8000-000000000000'), ]) # --- +# name: test_configurable_fields[schema2] + dict({ + '$defs': dict({ + 'Configurable': dict({ + 'properties': dict({ + 'llm_responses': dict({ + 'default': list([ + 'a', + ]), + 'description': 'A list of fake responses for this LLM', + 'items': dict({ + 'type': 'string', + }), + 'title': 'LLM Responses', + 'type': 'array', + }), + }), + 'title': 'Configurable', + 'type': 'object', + }), + }), + 'properties': dict({ + 'configurable': dict({ + '$ref': '#/$defs/Configurable', + }), + }), + 'title': 'RunnableConfigurableFieldsConfig', + 'type': 'object', + }) +# --- +# name: test_configurable_fields[schema3] + dict({ + '$defs': dict({ + 'Configurable': dict({ + 'properties': dict({ + 'prompt_template': dict({ + 'default': 'Hello, {name}!', + 'description': 'The prompt template for this chain', + 'title': 'Prompt Template', + 'type': 'string', + }), + }), + 'title': 'Configurable', + 'type': 'object', + }), + }), + 'properties': dict({ + 'configurable': dict({ + '$ref': '#/$defs/Configurable', + }), + }), + 'title': 'RunnableConfigurableFieldsConfig', + 'type': 'object', + }) +# --- +# name: test_configurable_fields[schema4] + dict({ + '$defs': dict({ + 'Configurable': dict({ + 'properties': dict({ + 'llm_responses': dict({ + 'default': list([ + 'a', + ]), + 'description': 'A list of fake responses for this LLM', + 'items': dict({ + 'type': 'string', + }), + 'title': 'LLM Responses', + 'type': 'array', + }), + 'prompt_template': dict({ + 'default': 'Hello, {name}!', + 'description': 'The prompt template for this chain', + 'title': 'Prompt Template', + 'type': 'string', + }), + }), + 'title': 'Configurable', + 'type': 'object', + }), + }), + 'properties': dict({ + 'configurable': dict({ + '$ref': '#/$defs/Configurable', + }), + }), + 'title': 'RunnableSequenceConfig', + 'type': 'object', + }) +# --- +# name: test_configurable_fields[schema5] + dict({ + '$defs': dict({ + 'Configurable': dict({ + 'properties': dict({ + 'llm_responses': dict({ + 'default': list([ + 'a', + ]), + 'description': 'A list of fake responses for this LLM', + 'items': dict({ + 'type': 'string', + }), + 'title': 'LLM Responses', + 'type': 'array', + }), + 'other_responses': dict({ + 'default': list([ + 'a', + ]), + 'items': dict({ + 'type': 'string', + }), + 'title': 'Other Responses', + 'type': 'array', + }), + 'prompt_template': dict({ + 'default': 'Hello, {name}!', + 'description': 'The prompt template for this chain', + 'title': 'Prompt Template', + 'type': 'string', + }), + }), + 'title': 'Configurable', + 'type': 'object', + }), + }), + 'properties': dict({ + 'configurable': dict({ + '$ref': '#/$defs/Configurable', + }), + }), + 'title': 'RunnableSequenceConfig', + 'type': 'object', + }) +# --- +# name: test_configurable_fields_example[schema7] + dict({ + '$defs': dict({ + 'Chat_Responses': dict({ + 'title': 'Chat Responses', + }), + 'Configurable': dict({ + 'properties': dict({ + 'chat_responses': dict({ + 'default': list([ + 'hello', + 'bye', + ]), + 'items': dict({ + '$ref': '#/$defs/Chat_Responses', + }), + 'title': 'Chat Responses', + 'type': 'array', + }), + 'llm': dict({ + '$ref': '#/$defs/LLM', + 'default': 'default', + }), + 'llm_responses': dict({ + 'default': list([ + 'a', + ]), + 'description': 'A list of fake responses for this LLM', + 'items': dict({ + 'type': 'string', + }), + 'title': 'LLM Responses', + 'type': 'array', + }), + 'prompt_template': dict({ + '$ref': '#/$defs/Prompt_Template', + 'default': 'hello', + 'description': 'The prompt template for this chain', + }), + }), + 'title': 'Configurable', + 'type': 'object', + }), + 'LLM': dict({ + 'title': 'LLM', + }), + 'Prompt_Template': dict({ + 'title': 'Prompt Template', + }), + }), + 'properties': dict({ + 'configurable': dict({ + '$ref': '#/$defs/Configurable', + }), + }), + 'title': 'RunnableSequenceConfig', + 'type': 'object', + }) +# --- +# name: test_configurable_fields_prefix_keys[schema6] + dict({ + 'definitions': dict({ + 'Chat_Responses': dict({ + 'title': 'Chat Responses', + }), + 'Configurable': dict({ + 'properties': dict({ + 'chat_sleep': dict({ + 'anyOf': list([ + dict({ + 'type': 'number', + }), + dict({ + 'type': 'null', + }), + ]), + 'default': None, + 'title': 'Chat Sleep', + }), + 'llm': dict({ + '$ref': '#/definitions/LLM', + 'default': 'default', + }), + 'llm==chat/responses': dict({ + 'default': list([ + 'hello', + 'bye', + ]), + 'items': dict({ + '$ref': '#/definitions/Chat_Responses', + }), + 'title': 'Chat Responses', + 'type': 'array', + }), + 'llm==default/responses': dict({ + 'default': list([ + 'a', + ]), + 'description': 'A list of fake responses for this LLM', + 'items': dict({ + 'type': 'string', + }), + 'title': 'LLM Responses', + 'type': 'array', + }), + 'prompt_template': dict({ + '$ref': '#/definitions/Prompt_Template', + 'default': 'hello', + 'description': 'The prompt template for this chain', + }), + }), + 'title': 'Configurable', + 'type': 'object', + }), + 'LLM': dict({ + 'title': 'LLM', + }), + 'Prompt_Template': dict({ + 'title': 'Prompt Template', + }), + }), + 'properties': dict({ + 'configurable': dict({ + '$ref': '#/definitions/Configurable', + }), + }), + 'title': 'RunnableSequenceConfig', + 'type': 'object', + }) +# --- # name: test_each ''' { @@ -1695,9 +1962,7 @@ 'additionalProperties': True, 'description': ''' Message from an AI. - AIMessage is returned from a chat model as a response to a prompt. - This message represents the output of the model and consists of both the raw output as returned by the model together standardized fields (e.g., tool calls, usage metadata) added by the LangChain framework. @@ -2078,10 +2343,8 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and do not contain the tool_call_id field. - The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -2211,15 +2474,10 @@ 'additionalProperties': True, 'description': ''' Message from a human. - HumanMessages are messages that are passed in from a human to the model. - Example: - .. code-block:: python - from langchain_core.messages import HumanMessage, SystemMessage - messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -2228,7 +2486,6 @@ content="What is your name?" ) ] - # Instantiate a chat model and invoke it with the messages model = ... print(model.invoke(messages)) @@ -2381,21 +2638,15 @@ 'InputTokenDetails': dict({ 'description': ''' Breakdown of input token counts. - Does *not* need to sum to full input token count. Does *not* need to have all keys. - Example: - .. code-block:: python - { "audio": 10, "cache_creation": 200, "cache_read": 100, } - .. versionadded:: 0.3.9 - May also hold extra provider-specific keys. ''', 'properties': dict({ @@ -2418,7 +2669,6 @@ 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. - Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', @@ -2500,18 +2750,13 @@ 'OutputTokenDetails': dict({ 'description': ''' Breakdown of output token counts. - Does *not* need to sum to full output token count. Does *not* need to have all keys. - Example: - .. code-block:: python - { "audio": 10, "reasoning": 200, } - .. versionadded:: 0.3.9 ''', 'properties': dict({ @@ -2531,16 +2776,11 @@ 'additionalProperties': True, 'description': ''' Message for priming AI behavior. - The system message is usually passed in as the first of a sequence of input messages. - Example: - .. code-block:: python - from langchain_core.messages import HumanMessage, SystemMessage - messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -2549,7 +2789,6 @@ content="What is your name?" ) ] - # Define a chat model and invoke it with the messages print(model.invoke(messages)) ''', @@ -2691,17 +2930,13 @@ 'ToolCall': dict({ 'description': ''' Represents a request to call a tool. - Example: - .. code-block:: python - { "name": "foo", "args": {"a": 1}, "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". ''', @@ -2741,18 +2976,13 @@ 'ToolCallChunk': dict({ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their values of `index` are equal and not None. - Example: - .. code-block:: python - left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] - ( AIMessageChunk(content="", tool_call_chunks=left_chunks) + AIMessageChunk(content="", tool_call_chunks=right_chunks) @@ -2821,40 +3051,27 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result is encoded inside the `content` field. - Example: A ToolMessage representing a result of 42 from a tool call with id - .. code-block:: python - from langchain_core.messages import ToolMessage - ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - - Example: A ToolMessage where only part of the tool output is sent to the model and the full output is passed in to artifact. - .. versionadded:: 0.2.17 - .. code-block:: python - from langchain_core.messages import ToolMessage - tool_output = { "stdout": "From the graph we can see that the correlation between x and y is ...", "stderr": None, "artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."}, } - ToolMessage( content=tool_output["stdout"], artifact=tool_output, tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -3021,13 +3238,9 @@ 'UsageMetadata': dict({ 'description': ''' Usage metadata for a message, such as token counts. - This is a standard representation of token usage that is consistent across models. - Example: - .. code-block:: python - { "input_tokens": 350, "output_tokens": 240, @@ -3042,9 +3255,7 @@ "reasoning": 200, } } - .. versionchanged:: 0.3.9 - Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ @@ -3136,9 +3347,7 @@ 'additionalProperties': True, 'description': ''' Message from an AI. - AIMessage is returned from a chat model as a response to a prompt. - This message represents the output of the model and consists of both the raw output as returned by the model together standardized fields (e.g., tool calls, usage metadata) added by the LangChain framework. @@ -3518,7 +3727,6 @@ 'ChatPromptValueConcrete': dict({ 'description': ''' Chat prompt value which explicitly lists out the message types it accepts. - For use in external schemas. ''', 'properties': dict({ @@ -3582,10 +3790,8 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and do not contain the tool_call_id field. - The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -3715,15 +3921,10 @@ 'additionalProperties': True, 'description': ''' Message from a human. - HumanMessages are messages that are passed in from a human to the model. - Example: - .. code-block:: python - from langchain_core.messages import HumanMessage, SystemMessage - messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -3732,7 +3933,6 @@ content="What is your name?" ) ] - # Instantiate a chat model and invoke it with the messages model = ... print(model.invoke(messages)) @@ -3885,21 +4085,15 @@ 'InputTokenDetails': dict({ 'description': ''' Breakdown of input token counts. - Does *not* need to sum to full input token count. Does *not* need to have all keys. - Example: - .. code-block:: python - { "audio": 10, "cache_creation": 200, "cache_read": 100, } - .. versionadded:: 0.3.9 - May also hold extra provider-specific keys. ''', 'properties': dict({ @@ -3922,7 +4116,6 @@ 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. - Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', @@ -4004,18 +4197,13 @@ 'OutputTokenDetails': dict({ 'description': ''' Breakdown of output token counts. - Does *not* need to sum to full output token count. Does *not* need to have all keys. - Example: - .. code-block:: python - { "audio": 10, "reasoning": 200, } - .. versionadded:: 0.3.9 ''', 'properties': dict({ @@ -4054,16 +4242,11 @@ 'additionalProperties': True, 'description': ''' Message for priming AI behavior. - The system message is usually passed in as the first of a sequence of input messages. - Example: - .. code-block:: python - from langchain_core.messages import HumanMessage, SystemMessage - messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -4072,7 +4255,6 @@ content="What is your name?" ) ] - # Define a chat model and invoke it with the messages print(model.invoke(messages)) ''', @@ -4214,17 +4396,13 @@ 'ToolCall': dict({ 'description': ''' Represents a request to call a tool. - Example: - .. code-block:: python - { "name": "foo", "args": {"a": 1}, "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". ''', @@ -4264,18 +4442,13 @@ 'ToolCallChunk': dict({ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their values of `index` are equal and not None. - Example: - .. code-block:: python - left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] - ( AIMessageChunk(content="", tool_call_chunks=left_chunks) + AIMessageChunk(content="", tool_call_chunks=right_chunks) @@ -4344,40 +4517,27 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result is encoded inside the `content` field. - Example: A ToolMessage representing a result of 42 from a tool call with id - .. code-block:: python - from langchain_core.messages import ToolMessage - ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - - Example: A ToolMessage where only part of the tool output is sent to the model and the full output is passed in to artifact. - .. versionadded:: 0.2.17 - .. code-block:: python - from langchain_core.messages import ToolMessage - tool_output = { "stdout": "From the graph we can see that the correlation between x and y is ...", "stderr": None, "artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."}, } - ToolMessage( content=tool_output["stdout"], artifact=tool_output, tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -4544,13 +4704,9 @@ 'UsageMetadata': dict({ 'description': ''' Usage metadata for a message, such as token counts. - This is a standard representation of token usage that is consistent across models. - Example: - .. code-block:: python - { "input_tokens": 350, "output_tokens": 240, @@ -4565,9 +4721,7 @@ "reasoning": 200, } } - .. versionchanged:: 0.3.9 - Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ @@ -4671,9 +4825,7 @@ 'additionalProperties': True, 'description': ''' Message from an AI. - AIMessage is returned from a chat model as a response to a prompt. - This message represents the output of the model and consists of both the raw output as returned by the model together standardized fields (e.g., tool calls, usage metadata) added by the LangChain framework. @@ -5053,7 +5205,6 @@ 'ChatPromptValueConcrete': dict({ 'description': ''' Chat prompt value which explicitly lists out the message types it accepts. - For use in external schemas. ''', 'properties': dict({ @@ -5117,10 +5268,8 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and do not contain the tool_call_id field. - The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -5250,15 +5399,10 @@ 'additionalProperties': True, 'description': ''' Message from a human. - HumanMessages are messages that are passed in from a human to the model. - Example: - .. code-block:: python - from langchain_core.messages import HumanMessage, SystemMessage - messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -5267,7 +5411,6 @@ content="What is your name?" ) ] - # Instantiate a chat model and invoke it with the messages model = ... print(model.invoke(messages)) @@ -5420,21 +5563,15 @@ 'InputTokenDetails': dict({ 'description': ''' Breakdown of input token counts. - Does *not* need to sum to full input token count. Does *not* need to have all keys. - Example: - .. code-block:: python - { "audio": 10, "cache_creation": 200, "cache_read": 100, } - .. versionadded:: 0.3.9 - May also hold extra provider-specific keys. ''', 'properties': dict({ @@ -5457,7 +5594,6 @@ 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. - Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', @@ -5539,18 +5675,13 @@ 'OutputTokenDetails': dict({ 'description': ''' Breakdown of output token counts. - Does *not* need to sum to full output token count. Does *not* need to have all keys. - Example: - .. code-block:: python - { "audio": 10, "reasoning": 200, } - .. versionadded:: 0.3.9 ''', 'properties': dict({ @@ -5589,16 +5720,11 @@ 'additionalProperties': True, 'description': ''' Message for priming AI behavior. - The system message is usually passed in as the first of a sequence of input messages. - Example: - .. code-block:: python - from langchain_core.messages import HumanMessage, SystemMessage - messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -5607,7 +5733,6 @@ content="What is your name?" ) ] - # Define a chat model and invoke it with the messages print(model.invoke(messages)) ''', @@ -5749,17 +5874,13 @@ 'ToolCall': dict({ 'description': ''' Represents a request to call a tool. - Example: - .. code-block:: python - { "name": "foo", "args": {"a": 1}, "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". ''', @@ -5799,18 +5920,13 @@ 'ToolCallChunk': dict({ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their values of `index` are equal and not None. - Example: - .. code-block:: python - left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] - ( AIMessageChunk(content="", tool_call_chunks=left_chunks) + AIMessageChunk(content="", tool_call_chunks=right_chunks) @@ -5879,40 +5995,27 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result is encoded inside the `content` field. - Example: A ToolMessage representing a result of 42 from a tool call with id - .. code-block:: python - from langchain_core.messages import ToolMessage - ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - - Example: A ToolMessage where only part of the tool output is sent to the model and the full output is passed in to artifact. - .. versionadded:: 0.2.17 - .. code-block:: python - from langchain_core.messages import ToolMessage - tool_output = { "stdout": "From the graph we can see that the correlation between x and y is ...", "stderr": None, "artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."}, } - ToolMessage( content=tool_output["stdout"], artifact=tool_output, tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -6079,13 +6182,9 @@ 'UsageMetadata': dict({ 'description': ''' Usage metadata for a message, such as token counts. - This is a standard representation of token usage that is consistent across models. - Example: - .. code-block:: python - { "input_tokens": 350, "output_tokens": 240, @@ -6100,9 +6199,7 @@ "reasoning": 200, } } - .. versionchanged:: 0.3.9 - Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ @@ -6144,9 +6241,7 @@ 'additionalProperties': True, 'description': ''' Message from an AI. - AIMessage is returned from a chat model as a response to a prompt. - This message represents the output of the model and consists of both the raw output as returned by the model together standardized fields (e.g., tool calls, usage metadata) added by the LangChain framework. @@ -6527,10 +6622,8 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and do not contain the tool_call_id field. - The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -6660,15 +6753,10 @@ 'additionalProperties': True, 'description': ''' Message from a human. - HumanMessages are messages that are passed in from a human to the model. - Example: - .. code-block:: python - from langchain_core.messages import HumanMessage, SystemMessage - messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -6677,7 +6765,6 @@ content="What is your name?" ) ] - # Instantiate a chat model and invoke it with the messages model = ... print(model.invoke(messages)) @@ -6830,21 +6917,15 @@ 'InputTokenDetails': dict({ 'description': ''' Breakdown of input token counts. - Does *not* need to sum to full input token count. Does *not* need to have all keys. - Example: - .. code-block:: python - { "audio": 10, "cache_creation": 200, "cache_read": 100, } - .. versionadded:: 0.3.9 - May also hold extra provider-specific keys. ''', 'properties': dict({ @@ -6867,7 +6948,6 @@ 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. - Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', @@ -6949,18 +7029,13 @@ 'OutputTokenDetails': dict({ 'description': ''' Breakdown of output token counts. - Does *not* need to sum to full output token count. Does *not* need to have all keys. - Example: - .. code-block:: python - { "audio": 10, "reasoning": 200, } - .. versionadded:: 0.3.9 ''', 'properties': dict({ @@ -6980,16 +7055,11 @@ 'additionalProperties': True, 'description': ''' Message for priming AI behavior. - The system message is usually passed in as the first of a sequence of input messages. - Example: - .. code-block:: python - from langchain_core.messages import HumanMessage, SystemMessage - messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -6998,7 +7068,6 @@ content="What is your name?" ) ] - # Define a chat model and invoke it with the messages print(model.invoke(messages)) ''', @@ -7140,17 +7209,13 @@ 'ToolCall': dict({ 'description': ''' Represents a request to call a tool. - Example: - .. code-block:: python - { "name": "foo", "args": {"a": 1}, "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". ''', @@ -7190,18 +7255,13 @@ 'ToolCallChunk': dict({ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their values of `index` are equal and not None. - Example: - .. code-block:: python - left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] - ( AIMessageChunk(content="", tool_call_chunks=left_chunks) + AIMessageChunk(content="", tool_call_chunks=right_chunks) @@ -7270,40 +7330,27 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result is encoded inside the `content` field. - Example: A ToolMessage representing a result of 42 from a tool call with id - .. code-block:: python - from langchain_core.messages import ToolMessage - ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - - Example: A ToolMessage where only part of the tool output is sent to the model and the full output is passed in to artifact. - .. versionadded:: 0.2.17 - .. code-block:: python - from langchain_core.messages import ToolMessage - tool_output = { "stdout": "From the graph we can see that the correlation between x and y is ...", "stderr": None, "artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."}, } - ToolMessage( content=tool_output["stdout"], artifact=tool_output, tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -7470,13 +7517,9 @@ 'UsageMetadata': dict({ 'description': ''' Usage metadata for a message, such as token counts. - This is a standard representation of token usage that is consistent across models. - Example: - .. code-block:: python - { "input_tokens": 350, "output_tokens": 240, @@ -7491,9 +7534,7 @@ "reasoning": 200, } } - .. versionchanged:: 0.3.9 - Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ @@ -7627,9 +7668,7 @@ 'additionalProperties': True, 'description': ''' Message from an AI. - AIMessage is returned from a chat model as a response to a prompt. - This message represents the output of the model and consists of both the raw output as returned by the model together standardized fields (e.g., tool calls, usage metadata) added by the LangChain framework. @@ -8009,7 +8048,6 @@ 'ChatPromptValueConcrete': dict({ 'description': ''' Chat prompt value which explicitly lists out the message types it accepts. - For use in external schemas. ''', 'properties': dict({ @@ -8073,10 +8111,8 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and do not contain the tool_call_id field. - The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -8206,15 +8242,10 @@ 'additionalProperties': True, 'description': ''' Message from a human. - HumanMessages are messages that are passed in from a human to the model. - Example: - .. code-block:: python - from langchain_core.messages import HumanMessage, SystemMessage - messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -8223,7 +8254,6 @@ content="What is your name?" ) ] - # Instantiate a chat model and invoke it with the messages model = ... print(model.invoke(messages)) @@ -8376,21 +8406,15 @@ 'InputTokenDetails': dict({ 'description': ''' Breakdown of input token counts. - Does *not* need to sum to full input token count. Does *not* need to have all keys. - Example: - .. code-block:: python - { "audio": 10, "cache_creation": 200, "cache_read": 100, } - .. versionadded:: 0.3.9 - May also hold extra provider-specific keys. ''', 'properties': dict({ @@ -8413,7 +8437,6 @@ 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. - Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', @@ -8495,18 +8518,13 @@ 'OutputTokenDetails': dict({ 'description': ''' Breakdown of output token counts. - Does *not* need to sum to full output token count. Does *not* need to have all keys. - Example: - .. code-block:: python - { "audio": 10, "reasoning": 200, } - .. versionadded:: 0.3.9 ''', 'properties': dict({ @@ -8545,16 +8563,11 @@ 'additionalProperties': True, 'description': ''' Message for priming AI behavior. - The system message is usually passed in as the first of a sequence of input messages. - Example: - .. code-block:: python - from langchain_core.messages import HumanMessage, SystemMessage - messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -8563,7 +8576,6 @@ content="What is your name?" ) ] - # Define a chat model and invoke it with the messages print(model.invoke(messages)) ''', @@ -8705,17 +8717,13 @@ 'ToolCall': dict({ 'description': ''' Represents a request to call a tool. - Example: - .. code-block:: python - { "name": "foo", "args": {"a": 1}, "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". ''', @@ -8755,18 +8763,13 @@ 'ToolCallChunk': dict({ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their values of `index` are equal and not None. - Example: - .. code-block:: python - left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] - ( AIMessageChunk(content="", tool_call_chunks=left_chunks) + AIMessageChunk(content="", tool_call_chunks=right_chunks) @@ -8835,40 +8838,27 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result is encoded inside the `content` field. - Example: A ToolMessage representing a result of 42 from a tool call with id - .. code-block:: python - from langchain_core.messages import ToolMessage - ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - - Example: A ToolMessage where only part of the tool output is sent to the model and the full output is passed in to artifact. - .. versionadded:: 0.2.17 - .. code-block:: python - from langchain_core.messages import ToolMessage - tool_output = { "stdout": "From the graph we can see that the correlation between x and y is ...", "stderr": None, "artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."}, } - ToolMessage( content=tool_output["stdout"], artifact=tool_output, tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -9035,13 +9025,9 @@ 'UsageMetadata': dict({ 'description': ''' Usage metadata for a message, such as token counts. - This is a standard representation of token usage that is consistent across models. - Example: - .. code-block:: python - { "input_tokens": 350, "output_tokens": 240, @@ -9056,9 +9042,7 @@ "reasoning": 200, } } - .. versionchanged:: 0.3.9 - Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ @@ -9145,9 +9129,7 @@ 'additionalProperties': True, 'description': ''' Message from an AI. - AIMessage is returned from a chat model as a response to a prompt. - This message represents the output of the model and consists of both the raw output as returned by the model together standardized fields (e.g., tool calls, usage metadata) added by the LangChain framework. @@ -9528,10 +9510,8 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and do not contain the tool_call_id field. - The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -9661,15 +9641,10 @@ 'additionalProperties': True, 'description': ''' Message from a human. - HumanMessages are messages that are passed in from a human to the model. - Example: - .. code-block:: python - from langchain_core.messages import HumanMessage, SystemMessage - messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -9678,7 +9653,6 @@ content="What is your name?" ) ] - # Instantiate a chat model and invoke it with the messages model = ... print(model.invoke(messages)) @@ -9831,21 +9805,15 @@ 'InputTokenDetails': dict({ 'description': ''' Breakdown of input token counts. - Does *not* need to sum to full input token count. Does *not* need to have all keys. - Example: - .. code-block:: python - { "audio": 10, "cache_creation": 200, "cache_read": 100, } - .. versionadded:: 0.3.9 - May also hold extra provider-specific keys. ''', 'properties': dict({ @@ -9868,7 +9836,6 @@ 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. - Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', @@ -9950,18 +9917,13 @@ 'OutputTokenDetails': dict({ 'description': ''' Breakdown of output token counts. - Does *not* need to sum to full output token count. Does *not* need to have all keys. - Example: - .. code-block:: python - { "audio": 10, "reasoning": 200, } - .. versionadded:: 0.3.9 ''', 'properties': dict({ @@ -9981,16 +9943,11 @@ 'additionalProperties': True, 'description': ''' Message for priming AI behavior. - The system message is usually passed in as the first of a sequence of input messages. - Example: - .. code-block:: python - from langchain_core.messages import HumanMessage, SystemMessage - messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -9999,7 +9956,6 @@ content="What is your name?" ) ] - # Define a chat model and invoke it with the messages print(model.invoke(messages)) ''', @@ -10141,17 +10097,13 @@ 'ToolCall': dict({ 'description': ''' Represents a request to call a tool. - Example: - .. code-block:: python - { "name": "foo", "args": {"a": 1}, "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". ''', @@ -10191,18 +10143,13 @@ 'ToolCallChunk': dict({ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their values of `index` are equal and not None. - Example: - .. code-block:: python - left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] - ( AIMessageChunk(content="", tool_call_chunks=left_chunks) + AIMessageChunk(content="", tool_call_chunks=right_chunks) @@ -10271,40 +10218,27 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result is encoded inside the `content` field. - Example: A ToolMessage representing a result of 42 from a tool call with id - .. code-block:: python - from langchain_core.messages import ToolMessage - ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - - Example: A ToolMessage where only part of the tool output is sent to the model and the full output is passed in to artifact. - .. versionadded:: 0.2.17 - .. code-block:: python - from langchain_core.messages import ToolMessage - tool_output = { "stdout": "From the graph we can see that the correlation between x and y is ...", "stderr": None, "artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."}, } - ToolMessage( content=tool_output["stdout"], artifact=tool_output, tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -10471,13 +10405,9 @@ 'UsageMetadata': dict({ 'description': ''' Usage metadata for a message, such as token counts. - This is a standard representation of token usage that is consistent across models. - Example: - .. code-block:: python - { "input_tokens": 350, "output_tokens": 240, @@ -10492,9 +10422,7 @@ "reasoning": 200, } } - .. versionchanged:: 0.3.9 - Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ @@ -10536,9 +10464,7 @@ 'additionalProperties': True, 'description': ''' Message from an AI. - AIMessage is returned from a chat model as a response to a prompt. - This message represents the output of the model and consists of both the raw output as returned by the model together standardized fields (e.g., tool calls, usage metadata) added by the LangChain framework. @@ -10918,7 +10844,6 @@ 'ChatPromptValueConcrete': dict({ 'description': ''' Chat prompt value which explicitly lists out the message types it accepts. - For use in external schemas. ''', 'properties': dict({ @@ -10982,10 +10907,8 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and do not contain the tool_call_id field. - The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -11115,15 +11038,10 @@ 'additionalProperties': True, 'description': ''' Message from a human. - HumanMessages are messages that are passed in from a human to the model. - Example: - .. code-block:: python - from langchain_core.messages import HumanMessage, SystemMessage - messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -11132,7 +11050,6 @@ content="What is your name?" ) ] - # Instantiate a chat model and invoke it with the messages model = ... print(model.invoke(messages)) @@ -11285,21 +11202,15 @@ 'InputTokenDetails': dict({ 'description': ''' Breakdown of input token counts. - Does *not* need to sum to full input token count. Does *not* need to have all keys. - Example: - .. code-block:: python - { "audio": 10, "cache_creation": 200, "cache_read": 100, } - .. versionadded:: 0.3.9 - May also hold extra provider-specific keys. ''', 'properties': dict({ @@ -11322,7 +11233,6 @@ 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. - Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', @@ -11404,18 +11314,13 @@ 'OutputTokenDetails': dict({ 'description': ''' Breakdown of output token counts. - Does *not* need to sum to full output token count. Does *not* need to have all keys. - Example: - .. code-block:: python - { "audio": 10, "reasoning": 200, } - .. versionadded:: 0.3.9 ''', 'properties': dict({ @@ -11465,16 +11370,11 @@ 'additionalProperties': True, 'description': ''' Message for priming AI behavior. - The system message is usually passed in as the first of a sequence of input messages. - Example: - .. code-block:: python - from langchain_core.messages import HumanMessage, SystemMessage - messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -11483,7 +11383,6 @@ content="What is your name?" ) ] - # Define a chat model and invoke it with the messages print(model.invoke(messages)) ''', @@ -11625,17 +11524,13 @@ 'ToolCall': dict({ 'description': ''' Represents a request to call a tool. - Example: - .. code-block:: python - { "name": "foo", "args": {"a": 1}, "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". ''', @@ -11675,18 +11570,13 @@ 'ToolCallChunk': dict({ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their values of `index` are equal and not None. - Example: - .. code-block:: python - left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] - ( AIMessageChunk(content="", tool_call_chunks=left_chunks) + AIMessageChunk(content="", tool_call_chunks=right_chunks) @@ -11755,40 +11645,27 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result is encoded inside the `content` field. - Example: A ToolMessage representing a result of 42 from a tool call with id - .. code-block:: python - from langchain_core.messages import ToolMessage - ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - - Example: A ToolMessage where only part of the tool output is sent to the model and the full output is passed in to artifact. - .. versionadded:: 0.2.17 - .. code-block:: python - from langchain_core.messages import ToolMessage - tool_output = { "stdout": "From the graph we can see that the correlation between x and y is ...", "stderr": None, "artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."}, } - ToolMessage( content=tool_output["stdout"], artifact=tool_output, tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -11955,13 +11832,9 @@ 'UsageMetadata': dict({ 'description': ''' Usage metadata for a message, such as token counts. - This is a standard representation of token usage that is consistent across models. - Example: - .. code-block:: python - { "input_tokens": 350, "output_tokens": 240, @@ -11976,9 +11849,7 @@ "reasoning": 200, } } - .. versionchanged:: 0.3.9 - Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ @@ -12032,9 +11903,7 @@ 'additionalProperties': True, 'description': ''' Message from an AI. - AIMessage is returned from a chat model as a response to a prompt. - This message represents the output of the model and consists of both the raw output as returned by the model together standardized fields (e.g., tool calls, usage metadata) added by the LangChain framework. @@ -12414,7 +12283,6 @@ 'ChatPromptValueConcrete': dict({ 'description': ''' Chat prompt value which explicitly lists out the message types it accepts. - For use in external schemas. ''', 'properties': dict({ @@ -12478,10 +12346,8 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. - FunctionMessage are an older version of the ToolMessage schema, and do not contain the tool_call_id field. - The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -12611,15 +12477,10 @@ 'additionalProperties': True, 'description': ''' Message from a human. - HumanMessages are messages that are passed in from a human to the model. - Example: - .. code-block:: python - from langchain_core.messages import HumanMessage, SystemMessage - messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -12628,7 +12489,6 @@ content="What is your name?" ) ] - # Instantiate a chat model and invoke it with the messages model = ... print(model.invoke(messages)) @@ -12781,21 +12641,15 @@ 'InputTokenDetails': dict({ 'description': ''' Breakdown of input token counts. - Does *not* need to sum to full input token count. Does *not* need to have all keys. - Example: - .. code-block:: python - { "audio": 10, "cache_creation": 200, "cache_read": 100, } - .. versionadded:: 0.3.9 - May also hold extra provider-specific keys. ''', 'properties': dict({ @@ -12818,7 +12672,6 @@ 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. - Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', @@ -12900,18 +12753,13 @@ 'OutputTokenDetails': dict({ 'description': ''' Breakdown of output token counts. - Does *not* need to sum to full output token count. Does *not* need to have all keys. - Example: - .. code-block:: python - { "audio": 10, "reasoning": 200, } - .. versionadded:: 0.3.9 ''', 'properties': dict({ @@ -12950,16 +12798,11 @@ 'additionalProperties': True, 'description': ''' Message for priming AI behavior. - The system message is usually passed in as the first of a sequence of input messages. - Example: - .. code-block:: python - from langchain_core.messages import HumanMessage, SystemMessage - messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -12968,7 +12811,6 @@ content="What is your name?" ) ] - # Define a chat model and invoke it with the messages print(model.invoke(messages)) ''', @@ -13110,17 +12952,13 @@ 'ToolCall': dict({ 'description': ''' Represents a request to call a tool. - Example: - .. code-block:: python - { "name": "foo", "args": {"a": 1}, "id": "123" } - This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". ''', @@ -13160,18 +12998,13 @@ 'ToolCallChunk': dict({ 'description': ''' A chunk of a tool call (e.g., as part of a stream). - When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their values of `index` are equal and not None. - Example: - .. code-block:: python - left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] - ( AIMessageChunk(content="", tool_call_chunks=left_chunks) + AIMessageChunk(content="", tool_call_chunks=right_chunks) @@ -13240,40 +13073,27 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. - ToolMessages contain the result of a tool invocation. Typically, the result is encoded inside the `content` field. - Example: A ToolMessage representing a result of 42 from a tool call with id - .. code-block:: python - from langchain_core.messages import ToolMessage - ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') - - Example: A ToolMessage where only part of the tool output is sent to the model and the full output is passed in to artifact. - .. versionadded:: 0.2.17 - .. code-block:: python - from langchain_core.messages import ToolMessage - tool_output = { "stdout": "From the graph we can see that the correlation between x and y is ...", "stderr": None, "artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."}, } - ToolMessage( content=tool_output["stdout"], artifact=tool_output, tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) - The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -13440,13 +13260,9 @@ 'UsageMetadata': dict({ 'description': ''' Usage metadata for a message, such as token counts. - This is a standard representation of token usage that is consistent across models. - Example: - .. code-block:: python - { "input_tokens": 350, "output_tokens": 240, @@ -13461,9 +13277,7 @@ "reasoning": 200, } } - .. versionchanged:: 0.3.9 - Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ From 5222d51fb0bad370a7c5923fc3b24af32d497798 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 26 Aug 2025 14:22:11 -0400 Subject: [PATCH 57/73] . --- .../language_models/chat_models.py | 86 +++++++++++-------- .../unit_tests/fake/test_fake_chat_model.py | 74 +++++----------- .../language_models/chat_models/test_base.py | 26 ++---- .../unit_tests/runnables/test_runnable.py | 4 +- .../runnables/test_runnable_events_v1.py | 6 +- .../runnables/test_runnable_events_v2.py | 6 +- libs/core/tests/unit_tests/stubs.py | 36 +------- 7 files changed, 91 insertions(+), 147 deletions(-) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 06c68c340fab1..9f7997cef9eb4 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -632,13 +632,18 @@ def stream( chunk.message.id = run_id chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) output_version = kwargs["_output_version"] - if isinstance(chunk.message, (AIMessage, AIMessageChunk)) and ( - not isinstance(chunk.message, AIMessageChunk) - or chunk.message.chunk_position != "last" - or chunk.message.content # Include last chunks with content + if ( + isinstance(chunk.message, (AIMessage, AIMessageChunk)) + and ( + not isinstance(chunk.message, AIMessageChunk) + or chunk.message.chunk_position != "last" + or chunk.message.content # Include last chunks with content + ) + and output_version + and output_version != "v0" ): chunk.message.additional_kwargs["output_version"] = ( - output_version or "v0" + output_version ) if output_version == "v1": # Overwrite .content with .content_blocks @@ -785,14 +790,17 @@ async def astream( chunk.message.id = run_id chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) output_version = kwargs["_output_version"] - if isinstance(chunk.message, (AIMessage, AIMessageChunk)) and ( - not isinstance(chunk.message, AIMessageChunk) - or chunk.message.chunk_position != "last" - or chunk.message.content # Include last chunks with content - ): - chunk.message.additional_kwargs["output_version"] = ( - output_version or "v0" + if ( + isinstance(chunk.message, (AIMessage, AIMessageChunk)) + and ( + not isinstance(chunk.message, AIMessageChunk) + or chunk.message.chunk_position != "last" + or chunk.message.content # Include last chunks with content ) + and output_version + and output_version != "v0" + ): + chunk.message.additional_kwargs["output_version"] = output_version if output_version == "v1": # Overwrite .content with .content_blocks chunk.message = _update_message_content_to_blocks( @@ -1278,14 +1286,17 @@ def _generate_with_cache( messages, stop=stop, output_version=output_version or "v0", **kwargs ): chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) - if isinstance(chunk.message, (AIMessage, AIMessageChunk)) and ( - not isinstance(chunk.message, AIMessageChunk) - or chunk.message.chunk_position != "last" - or chunk.message.content # Include last chunks with content - ): - chunk.message.additional_kwargs["output_version"] = ( - output_version or "v0" + if ( + isinstance(chunk.message, (AIMessage, AIMessageChunk)) + and ( + not isinstance(chunk.message, AIMessageChunk) + or chunk.message.chunk_position != "last" + or chunk.message.content # Include last chunks with content ) + and output_version + and output_version != "v0" + ): + chunk.message.additional_kwargs["output_version"] = output_version if run_manager: if chunk.message.id is None: chunk.message.id = run_id @@ -1344,10 +1355,12 @@ def _generate_with_cache( generation.message.response_metadata = _gen_info_and_msg_metadata( generation ) - if isinstance(generation.message, (AIMessage, AIMessageChunk)): - generation.message.additional_kwargs["output_version"] = ( - output_version or "v0" - ) + if ( + isinstance(generation.message, (AIMessage, AIMessageChunk)) + and output_version + and output_version != "v0" + ): + generation.message.additional_kwargs["output_version"] = output_version if len(result.generations) == 1 and result.llm_output is not None: result.generations[0].message.response_metadata = { **result.llm_output, @@ -1407,14 +1420,17 @@ async def _agenerate_with_cache( messages, stop=stop, output_version=output_version or "v0", **kwargs ): chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) - if isinstance(chunk.message, (AIMessage, AIMessageChunk)) and ( - not isinstance(chunk.message, AIMessageChunk) - or chunk.message.chunk_position != "last" - or chunk.message.content # Include last chunks with content - ): - chunk.message.additional_kwargs["output_version"] = ( - output_version or "v0" + if ( + isinstance(chunk.message, (AIMessage, AIMessageChunk)) + and ( + not isinstance(chunk.message, AIMessageChunk) + or chunk.message.chunk_position != "last" + or chunk.message.content # Include last chunks with content ) + and output_version + and output_version != "v0" + ): + chunk.message.additional_kwargs["output_version"] = output_version if run_manager: if chunk.message.id is None: chunk.message.id = run_id @@ -1473,10 +1489,12 @@ async def _agenerate_with_cache( generation.message.response_metadata = _gen_info_and_msg_metadata( generation ) - if isinstance(generation.message, (AIMessage, AIMessageChunk)): - generation.message.additional_kwargs["output_version"] = ( - output_version or "v0" - ) + if ( + isinstance(generation.message, (AIMessage, AIMessageChunk)) + and output_version + and output_version != "v0" + ): + generation.message.additional_kwargs["output_version"] = output_version if len(result.generations) == 1 and result.llm_output is not None: result.generations[0].message.response_metadata = { **result.llm_output, diff --git a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py index ad4d7e3a00a40..6d43332904673 100644 --- a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py +++ b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py @@ -28,17 +28,11 @@ def test_generic_fake_chat_model_invoke() -> None: infinite_cycle = cycle([AIMessage(content="hello"), AIMessage(content="goodbye")]) model = GenericFakeChatModel(messages=infinite_cycle) response = model.invoke("meow") - assert response == _any_id_ai_message( - content="hello", additional_kwargs={"output_version": "v0"} - ) + assert response == _any_id_ai_message(content="hello", additional_kwargs={}) response = model.invoke("kitty") - assert response == _any_id_ai_message( - content="goodbye", additional_kwargs={"output_version": "v0"} - ) + assert response == _any_id_ai_message(content="goodbye", additional_kwargs={}) response = model.invoke("meow") - assert response == _any_id_ai_message( - content="hello", additional_kwargs={"output_version": "v0"} - ) + assert response == _any_id_ai_message(content="hello", additional_kwargs={}) async def test_generic_fake_chat_model_ainvoke() -> None: @@ -46,17 +40,11 @@ async def test_generic_fake_chat_model_ainvoke() -> None: infinite_cycle = cycle([AIMessage(content="hello"), AIMessage(content="goodbye")]) model = GenericFakeChatModel(messages=infinite_cycle) response = await model.ainvoke("meow") - assert response == _any_id_ai_message( - content="hello", additional_kwargs={"output_version": "v0"} - ) + assert response == _any_id_ai_message(content="hello", additional_kwargs={}) response = await model.ainvoke("kitty") - assert response == _any_id_ai_message( - content="goodbye", additional_kwargs={"output_version": "v0"} - ) + assert response == _any_id_ai_message(content="goodbye", additional_kwargs={}) response = await model.ainvoke("meow") - assert response == _any_id_ai_message( - content="hello", additional_kwargs={"output_version": "v0"} - ) + assert response == _any_id_ai_message(content="hello", additional_kwargs={}) async def test_generic_fake_chat_model_stream() -> None: @@ -69,32 +57,24 @@ async def test_generic_fake_chat_model_stream() -> None: model = GenericFakeChatModel(messages=infinite_cycle) chunks = [chunk async for chunk in model.astream("meow")] assert chunks == [ - _any_id_ai_message_chunk( - content="hello", additional_kwargs={"output_version": "v0"} - ), - _any_id_ai_message_chunk( - content=" ", additional_kwargs={"output_version": "v0"} - ), + _any_id_ai_message_chunk(content="hello", additional_kwargs={}), + _any_id_ai_message_chunk(content=" ", additional_kwargs={}), _any_id_ai_message_chunk( content="goodbye", chunk_position="last", - additional_kwargs={"output_version": "v0"}, + additional_kwargs={}, ), ] assert len({chunk.id for chunk in chunks}) == 1 chunks = list(model.stream("meow")) assert chunks == [ - _any_id_ai_message_chunk( - content="hello", additional_kwargs={"output_version": "v0"} - ), - _any_id_ai_message_chunk( - content=" ", additional_kwargs={"output_version": "v0"} - ), + _any_id_ai_message_chunk(content="hello", additional_kwargs={}), + _any_id_ai_message_chunk(content=" ", additional_kwargs={}), _any_id_ai_message_chunk( content="goodbye", chunk_position="last", - additional_kwargs={"output_version": "v0"}, + additional_kwargs={}, ), ] assert len({chunk.id for chunk in chunks}) == 1 @@ -192,16 +172,12 @@ async def test_generic_fake_chat_model_astream_log() -> None: ] final = log_patches[-1] assert final.state["streamed_output"] == [ - _any_id_ai_message_chunk( - content="hello", additional_kwargs={"output_version": "v0"} - ), - _any_id_ai_message_chunk( - content=" ", additional_kwargs={"output_version": "v0"} - ), + _any_id_ai_message_chunk(content="hello", additional_kwargs={}), + _any_id_ai_message_chunk(content=" ", additional_kwargs={}), _any_id_ai_message_chunk( content="goodbye", chunk_position="last", - additional_kwargs={"output_version": "v0"}, + additional_kwargs={}, ), ] assert len({chunk.id for chunk in final.state["streamed_output"]}) == 1 @@ -257,16 +233,12 @@ async def on_llm_new_token( ) ] assert results == [ - _any_id_ai_message_chunk( - content="hello", additional_kwargs={"output_version": "v0"} - ), - _any_id_ai_message_chunk( - content=" ", additional_kwargs={"output_version": "v0"} - ), + _any_id_ai_message_chunk(content="hello", additional_kwargs={}), + _any_id_ai_message_chunk(content=" ", additional_kwargs={}), _any_id_ai_message_chunk( content="goodbye", chunk_position="last", - additional_kwargs={"output_version": "v0"}, + additional_kwargs={}, ), ] assert tokens == ["hello", " ", "goodbye"] @@ -280,18 +252,18 @@ def test_chat_model_inputs() -> None: content="hello" ) assert fake.invoke([("ai", "blah")]) == _any_id_ai_message( - content="blah", additional_kwargs={"output_version": "v0"} + content="blah", additional_kwargs={} ) assert fake.invoke([AIMessage(content="blah")]) == _any_id_ai_message( - content="blah", additional_kwargs={"output_version": "v0"} + content="blah", additional_kwargs={} ) def test_fake_list_chat_model_batch() -> None: expected = [ - _any_id_ai_message(content="a", additional_kwargs={"output_version": "v0"}), - _any_id_ai_message(content="b", additional_kwargs={"output_version": "v0"}), - _any_id_ai_message(content="c", additional_kwargs={"output_version": "v0"}), + _any_id_ai_message(content="a", additional_kwargs={}), + _any_id_ai_message(content="b", additional_kwargs={}), + _any_id_ai_message(content="c", additional_kwargs={}), ] for _ in range(20): # run this 20 times to test race condition in batch diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index 5d1e775b81722..66629dc012e68 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -208,14 +208,10 @@ def _llm_type(self) -> str: # is not strictly correct. # LangChain documents a pattern of adding BaseMessageChunks to accumulate a stream. # This may be better done with `reduce(operator.add, chunks)`. - assert chunks == [ - _any_id_ai_message(content="hello", additional_kwargs={"output_version": "v0"}) - ] + assert chunks == [_any_id_ai_message(content="hello", additional_kwargs={})] chunks = [chunk async for chunk in model.astream("anything")] - assert chunks == [ - _any_id_ai_message(content="hello", additional_kwargs={"output_version": "v0"}) - ] + assert chunks == [_any_id_ai_message(content="hello", additional_kwargs={})] async def test_astream_implementation_fallback_to_stream() -> None: @@ -257,26 +253,22 @@ def _llm_type(self) -> str: model = ModelWithSyncStream() chunks = list(model.stream("anything")) assert chunks == [ - _any_id_ai_message_chunk( - content="a", additional_kwargs={"output_version": "v0"} - ), + _any_id_ai_message_chunk(content="a", additional_kwargs={}), _any_id_ai_message_chunk( content="b", chunk_position="last", - additional_kwargs={"output_version": "v0"}, + additional_kwargs={}, ), ] assert len({chunk.id for chunk in chunks}) == 1 assert type(model)._astream == BaseChatModel._astream astream_chunks = [chunk async for chunk in model.astream("anything")] assert astream_chunks == [ - _any_id_ai_message_chunk( - content="a", additional_kwargs={"output_version": "v0"} - ), + _any_id_ai_message_chunk(content="a", additional_kwargs={}), _any_id_ai_message_chunk( content="b", chunk_position="last", - additional_kwargs={"output_version": "v0"}, + additional_kwargs={}, ), ] assert len({chunk.id for chunk in astream_chunks}) == 1 @@ -321,13 +313,11 @@ def _llm_type(self) -> str: model = ModelWithAsyncStream() chunks = [chunk async for chunk in model.astream("anything")] assert chunks == [ - _any_id_ai_message_chunk( - content="a", additional_kwargs={"output_version": "v0"} - ), + _any_id_ai_message_chunk(content="a", additional_kwargs={}), _any_id_ai_message_chunk( content="b", chunk_position="last", - additional_kwargs={"output_version": "v0"}, + additional_kwargs={}, ), ] assert len({chunk.id for chunk in chunks}) == 1 diff --git a/libs/core/tests/unit_tests/runnables/test_runnable.py b/libs/core/tests/unit_tests/runnables/test_runnable.py index 93b8b206dcada..f55efc2bb6fda 100644 --- a/libs/core/tests/unit_tests/runnables/test_runnable.py +++ b/libs/core/tests/unit_tests/runnables/test_runnable.py @@ -1865,7 +1865,7 @@ def test_prompt_with_chat_model( _any_id_ai_message_chunk( content="o", chunk_position="last", - additional_kwargs={"output_version": "v0"}, + additional_kwargs={}, ), ] assert prompt_spy.call_args.args[1] == {"question": "What is your name?"} @@ -1978,7 +1978,7 @@ async def test_prompt_with_chat_model_async( _any_id_ai_message_chunk( content="o", chunk_position="last", - additional_kwargs={"output_version": "v0"}, + additional_kwargs={}, ), ] assert prompt_spy.call_args.args[1] == {"question": "What is your name?"} diff --git a/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py b/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py index fc83ecbfa8af1..e19819685bcdf 100644 --- a/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py +++ b/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py @@ -1888,7 +1888,7 @@ async def test_events_astream_config() -> None: model_02 = model.with_config({"configurable": {"messages": good_world_on_repeat}}) assert model_02.invoke("hello") == AIMessage( - content="Goodbye world", additional_kwargs={"output_version": "v0"}, id="ai2" + content="Goodbye world", additional_kwargs={}, id="ai2" ) events = await _collect_events(model_02.astream_events("hello", version="v1")) @@ -2015,9 +2015,7 @@ def get_by_session_id(session_id: str) -> BaseChatMessageHistory: assert store == { "session-123": [ HumanMessage(content="hello"), - AIMessage( - content="hello", additional_kwargs={"output_version": "v0"}, id="ai3" - ), + AIMessage(content="hello", id="ai3"), ] } diff --git a/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py b/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py index 784c1548b788a..de65d9ff905e2 100644 --- a/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py +++ b/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py @@ -1840,7 +1840,7 @@ async def test_events_astream_config() -> None: model_02 = model.with_config({"configurable": {"messages": good_world_on_repeat}}) assert model_02.invoke("hello") == AIMessage( - content="Goodbye world", additional_kwargs={"output_version": "v0"}, id="ai2" + content="Goodbye world", additional_kwargs={}, id="ai2" ) events = await _collect_events(model_02.astream_events("hello", version="v2")) @@ -1986,9 +1986,7 @@ def _get_output_messages(*args: Any, **kwargs: Any) -> Any: assert store == { "session-123": [ HumanMessage(content="hello"), - AIMessage( - content="hello", additional_kwargs={"output_version": "v0"}, id="ai3" - ), + AIMessage(content="hello", id="ai3"), ] } diff --git a/libs/core/tests/unit_tests/stubs.py b/libs/core/tests/unit_tests/stubs.py index 332e8b9fe8723..bcdf60a6d3ca1 100644 --- a/libs/core/tests/unit_tests/stubs.py +++ b/libs/core/tests/unit_tests/stubs.py @@ -29,18 +29,7 @@ def _any_id_document(**kwargs: Any) -> Document: def _any_id_ai_message(**kwargs: Any) -> AIMessage: """Create ai message with an any id field.""" - # Set default additional_kwargs to include output_version if not provided - if "additional_kwargs" not in kwargs: - kwargs["additional_kwargs"] = {"output_version": "v0"} - elif ( - isinstance(kwargs["additional_kwargs"], dict) - and "output_version" not in kwargs["additional_kwargs"] - ): - kwargs["additional_kwargs"] = { - **kwargs["additional_kwargs"], - "output_version": "v0", - } - + # Don't automatically add output_version - it should only be present when explicit message = AIMessage(**kwargs) message.id = AnyStr() return message @@ -48,28 +37,7 @@ def _any_id_ai_message(**kwargs: Any) -> AIMessage: def _any_id_ai_message_chunk(**kwargs: Any) -> AIMessageChunk: """Create ai message with an any id field.""" - # Only exclude output_version from last chunks that have empty content - # (synthetic chunks) - is_empty_last_chunk = ( - kwargs.get("chunk_position") == "last" - and not kwargs.get("content") - and "additional_kwargs" not in kwargs - ) - - # Set default additional_kwargs to include output_version if not provided and not - # an empty last chunk - if not is_empty_last_chunk: - if "additional_kwargs" not in kwargs: - kwargs["additional_kwargs"] = {"output_version": "v0"} - elif ( - isinstance(kwargs["additional_kwargs"], dict) - and "output_version" not in kwargs["additional_kwargs"] - ): - kwargs["additional_kwargs"] = { - **kwargs["additional_kwargs"], - "output_version": "v0", - } - + # Don't automatically add output_version - it should only be present when explicit message = AIMessageChunk(**kwargs) message.id = AnyStr() return message From 720d08e932957423b79e59a72202168514a6ae7c Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 26 Aug 2025 14:24:32 -0400 Subject: [PATCH 58/73] . --- .../language_models/fake_chat_models.py | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/libs/core/langchain_core/language_models/fake_chat_models.py b/libs/core/langchain_core/language_models/fake_chat_models.py index 17306dd17d438..fc072866aeac3 100644 --- a/libs/core/langchain_core/language_models/fake_chat_models.py +++ b/libs/core/langchain_core/language_models/fake_chat_models.py @@ -322,7 +322,8 @@ def _stream( id=message.id, content="", additional_kwargs={ - "function_call": {fkey: fvalue_chunk} + "function_call": {fkey: fvalue_chunk}, + "output_version": "v0", }, ) ) @@ -337,7 +338,10 @@ def _stream( message=AIMessageChunk( id=message.id, content="", - additional_kwargs={"function_call": {fkey: fvalue}}, + additional_kwargs={ + "function_call": {fkey: fvalue}, + "output_version": "v0", + }, ) ) if run_manager: @@ -349,7 +353,9 @@ def _stream( else: chunk = ChatGenerationChunk( message=AIMessageChunk( - id=message.id, content="", additional_kwargs={key: value} + id=message.id, + content="", + additional_kwargs={key: value, "output_version": "v0"}, ) ) if run_manager: @@ -359,6 +365,14 @@ def _stream( ) yield chunk + # Add a final chunk with chunk_position="last" after all additional_kwargs + final_chunk = ChatGenerationChunk( + message=AIMessageChunk(id=message.id, content="", chunk_position="last") + ) + if run_manager: + run_manager.on_llm_new_token("", chunk=final_chunk) + yield final_chunk + @property def _llm_type(self) -> str: return "generic-fake-chat-model" From 620779f61bd0a4b47e42487e62be94878dbc9207 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 26 Aug 2025 14:38:09 -0400 Subject: [PATCH 59/73] . --- .../language_models/chat_models.py | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 9f7997cef9eb4..379e7d04be56e 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -422,18 +422,19 @@ def invoke( output_version: Optional[str] = None, **kwargs: Any, ) -> AIMessage: - """Invoke the chat model. + """Invoke the model. Args: - input: The input to the chat model. - config: The config to use for this run. - stop: Stop words to use when generating. + input: The model input. + config: The config to use for this model run. + stop: Stop words to use during generation. output_version: Override the model's ``output_version`` for this invocation. If None, uses the model's configured ``output_version``. **kwargs: Additional keyword arguments. Returns: The model's response message. + """ config = ensure_config(config) @@ -469,18 +470,19 @@ async def ainvoke( output_version: Optional[str] = None, **kwargs: Any, ) -> AIMessage: - """Asynchronously invoke the chat model. + """Asynchronously invoke the model. Args: - input: The input to the chat model. - config: The config to use for this run. - stop: Stop words to use when generating. + input: The model input. + config: The config to use for this model run. + stop: Stop words to use during generation. output_version: Override the model's ``output_version`` for this invocation. If None, uses the model's configured ``output_version``. **kwargs: Additional keyword arguments. Returns: The model's response message. + """ config = ensure_config(config) @@ -551,15 +553,16 @@ def stream( """Stream responses from the chat model. Args: - input: The input to the chat model. - config: The config to use for this run. - stop: Stop words to use when generating. + input: The model input. + config: The config to use for this model run. + stop: Stop words to use during generation. output_version: Override the model's ``output_version`` for this invocation. If None, uses the model's configured ``output_version``. **kwargs: Additional keyword arguments. Returns: Iterator of message chunks. + """ effective_output_version = ( output_version if output_version is not None else self.output_version @@ -704,18 +707,19 @@ async def astream( output_version: Optional[str] = None, **kwargs: Any, ) -> AsyncIterator[AIMessageChunk]: - """Asynchronously stream responses from the chat model. + """Asynchronously stream responses from the model. Args: - input: The input to the chat model. - config: The config to use for this run. - stop: Stop words to use when generating. + input: The model input. + config: The config to use for this model run. + stop: Stop words to use during generation. output_version: Override the model's ``output_version`` for this invocation. If None, uses the model's configured ``output_version``. **kwargs: Additional keyword arguments. Returns: - Async iterator of message chunks. + Async Iterator of message chunks. + """ effective_output_version = ( output_version if output_version is not None else self.output_version From dc5ac6695c4619012229f231344ba286b15115d3 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 26 Aug 2025 14:43:31 -0400 Subject: [PATCH 60/73] . --- .../unit_tests/fake/test_fake_chat_model.py | 44 ++++++++----------- .../language_models/chat_models/test_base.py | 13 +++--- .../tests/unit_tests/prompts/test_few_shot.py | 36 +++++++-------- 3 files changed, 42 insertions(+), 51 deletions(-) diff --git a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py index 6d43332904673..72242aee2518b 100644 --- a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py +++ b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py @@ -28,11 +28,11 @@ def test_generic_fake_chat_model_invoke() -> None: infinite_cycle = cycle([AIMessage(content="hello"), AIMessage(content="goodbye")]) model = GenericFakeChatModel(messages=infinite_cycle) response = model.invoke("meow") - assert response == _any_id_ai_message(content="hello", additional_kwargs={}) + assert response == _any_id_ai_message(content="hello") response = model.invoke("kitty") - assert response == _any_id_ai_message(content="goodbye", additional_kwargs={}) + assert response == _any_id_ai_message(content="goodbye") response = model.invoke("meow") - assert response == _any_id_ai_message(content="hello", additional_kwargs={}) + assert response == _any_id_ai_message(content="hello") async def test_generic_fake_chat_model_ainvoke() -> None: @@ -40,11 +40,11 @@ async def test_generic_fake_chat_model_ainvoke() -> None: infinite_cycle = cycle([AIMessage(content="hello"), AIMessage(content="goodbye")]) model = GenericFakeChatModel(messages=infinite_cycle) response = await model.ainvoke("meow") - assert response == _any_id_ai_message(content="hello", additional_kwargs={}) + assert response == _any_id_ai_message(content="hello") response = await model.ainvoke("kitty") - assert response == _any_id_ai_message(content="goodbye", additional_kwargs={}) + assert response == _any_id_ai_message(content="goodbye") response = await model.ainvoke("meow") - assert response == _any_id_ai_message(content="hello", additional_kwargs={}) + assert response == _any_id_ai_message(content="hello") async def test_generic_fake_chat_model_stream() -> None: @@ -57,24 +57,22 @@ async def test_generic_fake_chat_model_stream() -> None: model = GenericFakeChatModel(messages=infinite_cycle) chunks = [chunk async for chunk in model.astream("meow")] assert chunks == [ - _any_id_ai_message_chunk(content="hello", additional_kwargs={}), - _any_id_ai_message_chunk(content=" ", additional_kwargs={}), + _any_id_ai_message_chunk(content="hello"), + _any_id_ai_message_chunk(content=" "), _any_id_ai_message_chunk( content="goodbye", chunk_position="last", - additional_kwargs={}, ), ] assert len({chunk.id for chunk in chunks}) == 1 chunks = list(model.stream("meow")) assert chunks == [ - _any_id_ai_message_chunk(content="hello", additional_kwargs={}), - _any_id_ai_message_chunk(content=" ", additional_kwargs={}), + _any_id_ai_message_chunk(content="hello"), + _any_id_ai_message_chunk(content=" "), _any_id_ai_message_chunk( content="goodbye", chunk_position="last", - additional_kwargs={}, ), ] assert len({chunk.id for chunk in chunks}) == 1 @@ -172,12 +170,11 @@ async def test_generic_fake_chat_model_astream_log() -> None: ] final = log_patches[-1] assert final.state["streamed_output"] == [ - _any_id_ai_message_chunk(content="hello", additional_kwargs={}), - _any_id_ai_message_chunk(content=" ", additional_kwargs={}), + _any_id_ai_message_chunk(content="hello"), + _any_id_ai_message_chunk(content=" "), _any_id_ai_message_chunk( content="goodbye", chunk_position="last", - additional_kwargs={}, ), ] assert len({chunk.id for chunk in final.state["streamed_output"]}) == 1 @@ -233,12 +230,11 @@ async def on_llm_new_token( ) ] assert results == [ - _any_id_ai_message_chunk(content="hello", additional_kwargs={}), - _any_id_ai_message_chunk(content=" ", additional_kwargs={}), + _any_id_ai_message_chunk(content="hello"), + _any_id_ai_message_chunk(content=" "), _any_id_ai_message_chunk( content="goodbye", chunk_position="last", - additional_kwargs={}, ), ] assert tokens == ["hello", " ", "goodbye"] @@ -251,19 +247,17 @@ def test_chat_model_inputs() -> None: assert cast("HumanMessage", fake.invoke("hello")) == _any_id_human_message( content="hello" ) - assert fake.invoke([("ai", "blah")]) == _any_id_ai_message( - content="blah", additional_kwargs={} - ) + assert fake.invoke([("ai", "blah")]) == _any_id_ai_message(content="blah") assert fake.invoke([AIMessage(content="blah")]) == _any_id_ai_message( - content="blah", additional_kwargs={} + content="blah" ) def test_fake_list_chat_model_batch() -> None: expected = [ - _any_id_ai_message(content="a", additional_kwargs={}), - _any_id_ai_message(content="b", additional_kwargs={}), - _any_id_ai_message(content="c", additional_kwargs={}), + _any_id_ai_message(content="a"), + _any_id_ai_message(content="b"), + _any_id_ai_message(content="c"), ] for _ in range(20): # run this 20 times to test race condition in batch diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index 66629dc012e68..bf6bfe526105c 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -208,10 +208,10 @@ def _llm_type(self) -> str: # is not strictly correct. # LangChain documents a pattern of adding BaseMessageChunks to accumulate a stream. # This may be better done with `reduce(operator.add, chunks)`. - assert chunks == [_any_id_ai_message(content="hello", additional_kwargs={})] + assert chunks == [_any_id_ai_message(content="hello")] chunks = [chunk async for chunk in model.astream("anything")] - assert chunks == [_any_id_ai_message(content="hello", additional_kwargs={})] + assert chunks == [_any_id_ai_message(content="hello")] async def test_astream_implementation_fallback_to_stream() -> None: @@ -253,22 +253,20 @@ def _llm_type(self) -> str: model = ModelWithSyncStream() chunks = list(model.stream("anything")) assert chunks == [ - _any_id_ai_message_chunk(content="a", additional_kwargs={}), + _any_id_ai_message_chunk(content="a"), _any_id_ai_message_chunk( content="b", chunk_position="last", - additional_kwargs={}, ), ] assert len({chunk.id for chunk in chunks}) == 1 assert type(model)._astream == BaseChatModel._astream astream_chunks = [chunk async for chunk in model.astream("anything")] assert astream_chunks == [ - _any_id_ai_message_chunk(content="a", additional_kwargs={}), + _any_id_ai_message_chunk(content="a"), _any_id_ai_message_chunk( content="b", chunk_position="last", - additional_kwargs={}, ), ] assert len({chunk.id for chunk in astream_chunks}) == 1 @@ -313,11 +311,10 @@ def _llm_type(self) -> str: model = ModelWithAsyncStream() chunks = [chunk async for chunk in model.astream("anything")] assert chunks == [ - _any_id_ai_message_chunk(content="a", additional_kwargs={}), + _any_id_ai_message_chunk(content="a"), _any_id_ai_message_chunk( content="b", chunk_position="last", - additional_kwargs={}, ), ] assert len({chunk.id for chunk in chunks}) == 1 diff --git a/libs/core/tests/unit_tests/prompts/test_few_shot.py b/libs/core/tests/unit_tests/prompts/test_few_shot.py index 3e768cf380b7e..7706898ec42ab 100644 --- a/libs/core/tests/unit_tests/prompts/test_few_shot.py +++ b/libs/core/tests/unit_tests/prompts/test_few_shot.py @@ -357,12 +357,12 @@ async def test_few_shot_chat_message_prompt_template() -> None: ) expected = [ - SystemMessage(content="You are a helpful AI Assistant", additional_kwargs={}), - HumanMessage(content="2+2", additional_kwargs={}, example=False), - AIMessage(content="4", additional_kwargs={}, example=False), - HumanMessage(content="2+3", additional_kwargs={}, example=False), - AIMessage(content="5", additional_kwargs={}, example=False), - HumanMessage(content="100 + 1", additional_kwargs={}, example=False), + SystemMessage(content="You are a helpful AI Assistant"), + HumanMessage(content="2+2", example=False), + AIMessage(content="4", example=False), + HumanMessage(content="2+3", example=False), + AIMessage(content="5", example=False), + HumanMessage(content="100 + 1", example=False), ] messages = final_prompt.format_messages(input="100 + 1") @@ -432,12 +432,12 @@ def test_few_shot_chat_message_prompt_template_with_selector() -> None: + HumanMessagePromptTemplate.from_template("{input}") ) expected = [ - SystemMessage(content="You are a helpful AI Assistant", additional_kwargs={}), - HumanMessage(content="2+2", additional_kwargs={}, example=False), - AIMessage(content="4", additional_kwargs={}, example=False), - HumanMessage(content="2+3", additional_kwargs={}, example=False), - AIMessage(content="5", additional_kwargs={}, example=False), - HumanMessage(content="100 + 1", additional_kwargs={}, example=False), + SystemMessage(content="You are a helpful AI Assistant"), + HumanMessage(content="2+2", example=False), + AIMessage(content="4", example=False), + HumanMessage(content="2+3", example=False), + AIMessage(content="5", example=False), + HumanMessage(content="100 + 1", example=False), ] messages = final_prompt.format_messages(input="100 + 1") assert messages == expected @@ -531,12 +531,12 @@ async def test_few_shot_chat_message_prompt_template_with_selector_async() -> No + HumanMessagePromptTemplate.from_template("{input}") ) expected = [ - SystemMessage(content="You are a helpful AI Assistant", additional_kwargs={}), - HumanMessage(content="2+2", additional_kwargs={}, example=False), - AIMessage(content="4", additional_kwargs={}, example=False), - HumanMessage(content="2+3", additional_kwargs={}, example=False), - AIMessage(content="5", additional_kwargs={}, example=False), - HumanMessage(content="100 + 1", additional_kwargs={}, example=False), + SystemMessage(content="You are a helpful AI Assistant"), + HumanMessage(content="2+2", example=False), + AIMessage(content="4", example=False), + HumanMessage(content="2+3", example=False), + AIMessage(content="5", example=False), + HumanMessage(content="100 + 1", example=False), ] messages = await final_prompt.aformat_messages(input="100 + 1") assert messages == expected From 2bbd034f8528f22dac1af54cac34191071034004 Mon Sep 17 00:00:00 2001 From: ccurme Date: Tue, 26 Aug 2025 15:58:43 -0300 Subject: [PATCH 61/73] fix(core): (v1) invoke callback prior to yielding final chunk (#32695) --- .../language_models/chat_models.py | 54 +++++++++++-------- libs/core/langchain_core/messages/ai.py | 2 +- .../chat_models/test_base_standard.py | 3 +- .../chat_models/test_responses_standard.py | 2 +- .../tests/unit_tests/chat_models/test_base.py | 37 +++++++++---- .../integration_tests/chat_models.py | 2 + 6 files changed, 64 insertions(+), 36 deletions(-) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 365fc4a1ba33d..675804808b22e 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -583,9 +583,13 @@ def stream( empty_content: Union[str, list] = ( "" if isinstance(chunk.message.content, str) else [] ) - yield AIMessageChunk( + msg_chunk = AIMessageChunk( content=empty_content, chunk_position="last", id=run_id ) + run_manager.on_llm_new_token( + "", chunk=ChatGenerationChunk(message=msg_chunk) + ) + yield msg_chunk except BaseException as e: generations_with_error_metadata = _generate_response_from_error(e) chat_generation_chunk = merge_chat_generation_chunks(chunks) @@ -700,9 +704,13 @@ async def astream( empty_content: Union[str, list] = ( "" if isinstance(chunk.message.content, str) else [] ) - yield AIMessageChunk( + msg_chunk = AIMessageChunk( content=empty_content, chunk_position="last", id=run_id ) + await run_manager.on_llm_new_token( + "", chunk=ChatGenerationChunk(message=msg_chunk) + ) + yield msg_chunk except BaseException as e: generations_with_error_metadata = _generate_response_from_error(e) chat_generation_chunk = merge_chat_generation_chunks(chunks) @@ -1157,14 +1165,14 @@ def _generate_with_cache( yielded = False for chunk in self._stream(messages, stop=stop, **kwargs): chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) + if self.output_version == "v1": + # Overwrite .content with .content_blocks + chunk.message = _update_message_content_to_blocks( + chunk.message, "v1" + ) if run_manager: if chunk.message.id is None: chunk.message.id = run_id - if self.output_version == "v1": - # Overwrite .content with .content_blocks - chunk.message = _update_message_content_to_blocks( - chunk.message, "v1" - ) run_manager.on_llm_new_token( cast("str", chunk.message.content), chunk=chunk ) @@ -1180,13 +1188,14 @@ def _generate_with_cache( empty_content: Union[str, list] = ( "" if isinstance(chunk.message.content, str) else [] ) - chunks.append( - ChatGenerationChunk( - message=AIMessageChunk( - content=empty_content, chunk_position="last", id=run_id - ) + chunk = ChatGenerationChunk( + message=AIMessageChunk( + content=empty_content, chunk_position="last", id=run_id ) ) + if run_manager: + run_manager.on_llm_new_token("", chunk=chunk) + chunks.append(chunk) result = generate_from_stream(iter(chunks)) elif inspect.signature(self._generate).parameters.get("run_manager"): result = self._generate( @@ -1264,14 +1273,14 @@ async def _agenerate_with_cache( yielded = False async for chunk in self._astream(messages, stop=stop, **kwargs): chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) + if self.output_version == "v1": + # Overwrite .content with .content_blocks + chunk.message = _update_message_content_to_blocks( + chunk.message, "v1" + ) if run_manager: if chunk.message.id is None: chunk.message.id = run_id - if self.output_version == "v1": - # Overwrite .content with .content_blocks - chunk.message = _update_message_content_to_blocks( - chunk.message, "v1" - ) await run_manager.on_llm_new_token( cast("str", chunk.message.content), chunk=chunk ) @@ -1287,13 +1296,14 @@ async def _agenerate_with_cache( empty_content: Union[str, list] = ( "" if isinstance(chunk.message.content, str) else [] ) - chunks.append( - ChatGenerationChunk( - message=AIMessageChunk( - content=empty_content, chunk_position="last", id=run_id - ) + chunk = ChatGenerationChunk( + message=AIMessageChunk( + content=empty_content, chunk_position="last", id=run_id ) ) + if run_manager: + await run_manager.on_llm_new_token("", chunk=chunk) + chunks.append(chunk) result = generate_from_stream(iter(chunks)) elif inspect.signature(self._agenerate).parameters.get("run_manager"): result = await self._agenerate( diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index d6ab74e2df97b..221290424e1a4 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -232,7 +232,7 @@ def content_blocks(self) -> list[types.ContentBlock]: translator = get_translator(model_provider) if translator: try: - return translator["translate_content_chunk"](self) + return translator["translate_content"](self) except NotImplementedError: pass diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py index b51efff1cebf5..d96b3e69d24de 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py @@ -98,9 +98,8 @@ def test_openai_pdf_inputs(self, model: BaseChatModel) -> None: {"type": "text", "text": "Summarize this document:"}, { "type": "file", - "source_type": "base64", "mime_type": "application/pdf", - "data": pdf_data, + "base64": pdf_data, "filename": "my-pdf", # OpenAI requires a filename }, ] diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py index d06ed1e40f574..4b6430e4eadc9 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py @@ -20,7 +20,7 @@ def chat_model_class(self) -> type[BaseChatModel]: @property def chat_model_params(self) -> dict: - return {"model": "gpt-4o-mini", "use_responses_api": True} + return {"model": "gpt-4o-mini", "output_version": "responses/v1"} @pytest.mark.xfail(reason="Unsupported.") def test_stop_sequence(self, model: BaseChatModel) -> None: diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index a63f89647869a..e4fe852588ef6 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -735,17 +735,22 @@ def test_format_message_content() -> None: assert [{"type": "text", "text": "hello"}] == _format_message_content(content) # Standard multi-modal inputs - content = [{"type": "image", "source_type": "url", "url": "https://..."}] + contents = [ + {"type": "image", "source_type": "url", "url": "https://..."}, # v0 + {"type": "image", "url": "https://..."}, # v1 + ] expected = [{"type": "image_url", "image_url": {"url": "https://..."}}] - assert expected == _format_message_content(content) + for content in contents: + assert expected == _format_message_content([content]) - content = [ + contents = [ { "type": "image", "source_type": "base64", "data": "", "mime_type": "image/png", - } + }, + {"type": "image", "base64": "", "mime_type": "image/png"}, ] expected = [ { @@ -753,16 +758,23 @@ def test_format_message_content() -> None: "image_url": {"url": "data:image/png;base64,"}, } ] - assert expected == _format_message_content(content) + for content in contents: + assert expected == _format_message_content([content]) - content = [ + contents = [ { "type": "file", "source_type": "base64", "data": "", "mime_type": "application/pdf", "filename": "my_file", - } + }, + { + "type": "file", + "base64": "", + "mime_type": "application/pdf", + "filename": "my_file", + }, ] expected = [ { @@ -773,11 +785,16 @@ def test_format_message_content() -> None: }, } ] - assert expected == _format_message_content(content) + for content in contents: + assert expected == _format_message_content([content]) - content = [{"type": "file", "source_type": "id", "id": "file-abc123"}] + contents = [ + {"type": "file", "source_type": "id", "id": "file-abc123"}, + {"type": "file", "file_id": "file-abc123"}, + ] expected = [{"type": "file", "file": {"file_id": "file-abc123"}}] - assert expected == _format_message_content(content) + for content in contents: + assert expected == _format_message_content([content]) class GenerateUsername(BaseModel): diff --git a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py index f8024195e3990..c36f7b27ab246 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py +++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py @@ -2905,6 +2905,8 @@ def get_weather(location: str) -> str: input_message = HumanMessage("What is the weather in San Francisco, CA?") tool_call_message = llm_with_tools.invoke([input_message]) assert isinstance(tool_call_message, AIMessage) + content_blocks = tool_call_message.content_blocks + assert any(block["type"] == "tool_call" for block in content_blocks) tool_calls = tool_call_message.tool_calls assert len(tool_calls) == 1 tool_call = tool_calls[0] From 447db134736b7d8bf16260b8e8cd5b61d40c5af8 Mon Sep 17 00:00:00 2001 From: ccurme Date: Tue, 26 Aug 2025 17:33:59 -0300 Subject: [PATCH 62/73] feat(openai): (v1) support `content_blocks` on legacy v0 responses API format (#32700) --- .../messages/block_translators/openai.py | 144 +++++++++++++++++ .../messages/block_translators/test_openai.py | 149 ++++++++++++++++++ .../langchain_openai/chat_models/_compat.py | 114 -------------- .../langchain_openai/chat_models/base.py | 4 +- .../tests/unit_tests/chat_models/test_base.py | 4 +- 5 files changed, 299 insertions(+), 116 deletions(-) diff --git a/libs/core/langchain_core/messages/block_translators/openai.py b/libs/core/langchain_core/messages/block_translators/openai.py index 10bf18c0a6fbf..e5802b61bc3c9 100644 --- a/libs/core/langchain_core/messages/block_translators/openai.py +++ b/libs/core/langchain_core/messages/block_translators/openai.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json from collections.abc import Iterable from typing import TYPE_CHECKING, Any, Optional, Union, cast @@ -146,6 +147,147 @@ def _convert_from_v1_to_chat_completions(message: AIMessage) -> AIMessage: return message +# Responses +_FUNCTION_CALL_IDS_MAP_KEY = "__openai_function_call_ids__" + + +def _convert_from_v03_ai_message(message: AIMessage) -> AIMessage: + """Convert v0 AIMessage into ``output_version="responses/v1"`` format.""" + from langchain_core.messages import AIMessageChunk + + # Only update ChatOpenAI v0.3 AIMessages + is_chatopenai_v03 = ( + isinstance(message.content, list) + and all(isinstance(b, dict) for b in message.content) + ) and ( + any( + item in message.additional_kwargs + for item in [ + "reasoning", + "tool_outputs", + "refusal", + _FUNCTION_CALL_IDS_MAP_KEY, + ] + ) + or ( + isinstance(message.id, str) + and message.id.startswith("msg_") + and (response_id := message.response_metadata.get("id")) + and isinstance(response_id, str) + and response_id.startswith("resp_") + ) + ) + if not is_chatopenai_v03: + return message + + content_order = [ + "reasoning", + "code_interpreter_call", + "mcp_call", + "image_generation_call", + "text", + "refusal", + "function_call", + "computer_call", + "mcp_list_tools", + "mcp_approval_request", + # N. B. "web_search_call" and "file_search_call" were not passed back in + # in v0.3 + ] + + # Build a bucket for every known block type + buckets: dict[str, list] = {key: [] for key in content_order} + unknown_blocks = [] + + # Reasoning + if reasoning := message.additional_kwargs.get("reasoning"): + if isinstance(message, AIMessageChunk) and message.chunk_position != "last": + buckets["reasoning"].append({**reasoning, "type": "reasoning"}) + else: + buckets["reasoning"].append(reasoning) + + # Refusal + if refusal := message.additional_kwargs.get("refusal"): + buckets["refusal"].append({"type": "refusal", "refusal": refusal}) + + # Text + for block in message.content: + if isinstance(block, dict) and block.get("type") == "text": + block_copy = block.copy() + if isinstance(message.id, str) and message.id.startswith("msg_"): + block_copy["id"] = message.id + buckets["text"].append(block_copy) + else: + unknown_blocks.append(block) + + # Function calls + function_call_ids = message.additional_kwargs.get(_FUNCTION_CALL_IDS_MAP_KEY) + if ( + isinstance(message, AIMessageChunk) + and len(message.tool_call_chunks) == 1 + and message.chunk_position != "last" + ): + # Isolated chunk + tool_call_chunk = message.tool_call_chunks[0] + function_call = { + "type": "function_call", + "name": tool_call_chunk.get("name"), + "arguments": tool_call_chunk.get("args"), + "call_id": tool_call_chunk.get("id"), + } + if function_call_ids is not None and ( + _id := function_call_ids.get(tool_call_chunk.get("id")) + ): + function_call["id"] = _id + buckets["function_call"].append(function_call) + else: + for tool_call in message.tool_calls: + function_call = { + "type": "function_call", + "name": tool_call["name"], + "arguments": json.dumps(tool_call["args"], ensure_ascii=False), + "call_id": tool_call["id"], + } + if function_call_ids is not None and ( + _id := function_call_ids.get(tool_call["id"]) + ): + function_call["id"] = _id + buckets["function_call"].append(function_call) + + # Tool outputs + tool_outputs = message.additional_kwargs.get("tool_outputs", []) + for block in tool_outputs: + if isinstance(block, dict) and (key := block.get("type")) and key in buckets: + buckets[key].append(block) + else: + unknown_blocks.append(block) + + # Re-assemble the content list in the canonical order + new_content = [] + for key in content_order: + new_content.extend(buckets[key]) + new_content.extend(unknown_blocks) + + new_additional_kwargs = dict(message.additional_kwargs) + new_additional_kwargs.pop("reasoning", None) + new_additional_kwargs.pop("refusal", None) + new_additional_kwargs.pop("tool_outputs", None) + + if "id" in message.response_metadata: + new_id = message.response_metadata["id"] + else: + new_id = message.id + + return message.model_copy( + update={ + "content": new_content, + "additional_kwargs": new_additional_kwargs, + "id": new_id, + }, + deep=False, + ) + + # v1 / Responses def _convert_annotation_to_v1(annotation: dict[str, Any]) -> types.Annotation: annotation_type = annotation.get("type") @@ -419,6 +561,7 @@ def translate_content(message: AIMessage) -> list[types.ContentBlock]: """Derive standard content blocks from a message with OpenAI content.""" if isinstance(message.content, str): return _convert_to_v1_from_chat_completions(message) + message = _convert_from_v03_ai_message(message) return _convert_to_v1_from_responses(message) @@ -426,6 +569,7 @@ def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock] """Derive standard content blocks from a message chunk with OpenAI content.""" if isinstance(message.content, str): return _convert_to_v1_from_chat_completions_chunk(message) + message = _convert_from_v03_ai_message(message) # type: ignore[assignment] return _convert_to_v1_from_responses(message) diff --git a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py index 924252dae2b49..873ca517dd627 100644 --- a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py +++ b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py @@ -293,3 +293,152 @@ def test_convert_to_v1_from_openai_input() -> None: ] assert _content_blocks_equal_ignore_id(message.content_blocks, expected) + + +def test_compat_responses_v03() -> None: + # Check compatibility with v0.3 legacy message format + message_v03 = AIMessage( + content=[ + {"type": "text", "text": "Hello, world!", "annotations": [{"type": "foo"}]} + ], + additional_kwargs={ + "reasoning": { + "type": "reasoning", + "id": "rs_123", + "summary": [ + {"type": "summary_text", "text": "summary 1"}, + {"type": "summary_text", "text": "summary 2"}, + ], + }, + "tool_outputs": [ + { + "type": "web_search_call", + "id": "websearch_123", + "status": "completed", + } + ], + "refusal": "I cannot assist with that.", + "__openai_function_call_ids__": {"call_abc": "fc_abc"}, + }, + tool_calls=[ + {"type": "tool_call", "name": "my_tool", "args": {"x": 3}, "id": "call_abc"} + ], + response_metadata={"id": "resp_123", "model_provider": "openai"}, + id="msg_123", + ) + + expected_content: list[types.ContentBlock] = [ + {"type": "reasoning", "id": "rs_123", "reasoning": "summary 1"}, + {"type": "reasoning", "id": "rs_123", "reasoning": "summary 2"}, + { + "type": "text", + "text": "Hello, world!", + "annotations": [ + {"type": "non_standard_annotation", "value": {"type": "foo"}} + ], + "id": "msg_123", + }, + { + "type": "non_standard", + "value": {"type": "refusal", "refusal": "I cannot assist with that."}, + }, + { + "type": "tool_call", + "name": "my_tool", + "args": {"x": 3}, + "id": "call_abc", + "extras": {"item_id": "fc_abc"}, + }, + {"type": "web_search_call", "id": "websearch_123", "status": "completed"}, # type: ignore[list-item] + {"type": "web_search_result", "id": "websearch_123"}, + ] + assert message_v03.content_blocks == expected_content + + # Test chunks + ## Tool calls + chunk_1 = AIMessageChunk( + content=[], + additional_kwargs={"__openai_function_call_ids__": {"call_abc": "fc_abc"}}, + tool_call_chunks=[ + { + "type": "tool_call_chunk", + "name": "my_tool", + "args": "", + "id": "call_abc", + "index": 0, + } + ], + response_metadata={"model_provider": "openai"}, + ) + expected_content = [ + { + "type": "tool_call_chunk", + "name": "my_tool", + "args": "", + "id": "call_abc", + "index": 0, + "extras": {"item_id": "fc_abc"}, + } + ] + assert chunk_1.content_blocks == expected_content + + chunk_2 = AIMessageChunk( + content=[], + additional_kwargs={"__openai_function_call_ids__": {}}, + tool_call_chunks=[ + { + "type": "tool_call_chunk", + "name": None, + "args": "{", + "id": None, + "index": 0, + } + ], + ) + expected_content = [ + {"type": "tool_call_chunk", "name": None, "args": "{", "id": None, "index": 0} + ] + + chunk = chunk_1 + chunk_2 + expected_content = [ + { + "type": "tool_call_chunk", + "name": "my_tool", + "args": "{", + "id": "call_abc", + "index": 0, + "extras": {"item_id": "fc_abc"}, + } + ] + assert chunk.content_blocks == expected_content + + ## Reasoning + chunk_1 = AIMessageChunk( + content=[], + additional_kwargs={ + "reasoning": {"id": "rs_abc", "summary": [], "type": "reasoning"} + }, + response_metadata={"model_provider": "openai"}, + ) + expected_content = [{"type": "reasoning", "id": "rs_abc"}] + assert chunk_1.content_blocks == expected_content + + chunk_2 = AIMessageChunk( + content=[], + additional_kwargs={ + "reasoning": { + "summary": [ + {"index": 0, "type": "summary_text", "text": "reasoning text"} + ] + } + }, + response_metadata={"model_provider": "openai"}, + ) + expected_content = [{"type": "reasoning", "reasoning": "reasoning text"}] + assert chunk_2.content_blocks == expected_content + + chunk = chunk_1 + chunk_2 + expected_content = [ + {"type": "reasoning", "reasoning": "reasoning text", "id": "rs_abc"} + ] + assert chunk.content_blocks == expected_content diff --git a/libs/partners/openai/langchain_openai/chat_models/_compat.py b/libs/partners/openai/langchain_openai/chat_models/_compat.py index eb9ce3f40d464..f06496b604f5e 100644 --- a/libs/partners/openai/langchain_openai/chat_models/_compat.py +++ b/libs/partners/openai/langchain_openai/chat_models/_compat.py @@ -147,120 +147,6 @@ def _convert_to_v03_ai_message( return message -def _convert_from_v03_ai_message(message: AIMessage) -> AIMessage: - """Convert an old-style v0.3 AIMessage into the new content-block format.""" - # Only update ChatOpenAI v0.3 AIMessages - # TODO: structure provenance into AIMessage - is_chatopenai_v03 = ( - isinstance(message.content, list) - and all(isinstance(b, dict) for b in message.content) - ) and ( - any( - item in message.additional_kwargs - for item in [ - "reasoning", - "tool_outputs", - "refusal", - _FUNCTION_CALL_IDS_MAP_KEY, - ] - ) - or ( - isinstance(message.id, str) - and message.id.startswith("msg_") - and (response_id := message.response_metadata.get("id")) - and isinstance(response_id, str) - and response_id.startswith("resp_") - ) - ) - if not is_chatopenai_v03: - return message - - content_order = [ - "reasoning", - "code_interpreter_call", - "mcp_call", - "image_generation_call", - "text", - "refusal", - "function_call", - "computer_call", - "mcp_list_tools", - "mcp_approval_request", - # N. B. "web_search_call" and "file_search_call" were not passed back in - # in v0.3 - ] - - # Build a bucket for every known block type - buckets: dict[str, list] = {key: [] for key in content_order} - unknown_blocks = [] - - # Reasoning - if reasoning := message.additional_kwargs.get("reasoning"): - buckets["reasoning"].append(reasoning) - - # Refusal - if refusal := message.additional_kwargs.get("refusal"): - buckets["refusal"].append({"type": "refusal", "refusal": refusal}) - - # Text - for block in message.content: - if isinstance(block, dict) and block.get("type") == "text": - block_copy = block.copy() - if isinstance(message.id, str) and message.id.startswith("msg_"): - block_copy["id"] = message.id - buckets["text"].append(block_copy) - else: - unknown_blocks.append(block) - - # Function calls - function_call_ids = message.additional_kwargs.get(_FUNCTION_CALL_IDS_MAP_KEY) - for tool_call in message.tool_calls: - function_call = { - "type": "function_call", - "name": tool_call["name"], - "arguments": json.dumps(tool_call["args"], ensure_ascii=False), - "call_id": tool_call["id"], - } - if function_call_ids is not None and ( - _id := function_call_ids.get(tool_call["id"]) - ): - function_call["id"] = _id - buckets["function_call"].append(function_call) - - # Tool outputs - tool_outputs = message.additional_kwargs.get("tool_outputs", []) - for block in tool_outputs: - if isinstance(block, dict) and (key := block.get("type")) and key in buckets: - buckets[key].append(block) - else: - unknown_blocks.append(block) - - # Re-assemble the content list in the canonical order - new_content = [] - for key in content_order: - new_content.extend(buckets[key]) - new_content.extend(unknown_blocks) - - new_additional_kwargs = dict(message.additional_kwargs) - new_additional_kwargs.pop("reasoning", None) - new_additional_kwargs.pop("refusal", None) - new_additional_kwargs.pop("tool_outputs", None) - - if "id" in message.response_metadata: - new_id = message.response_metadata["id"] - else: - new_id = message.id - - return message.model_copy( - update={ - "content": new_content, - "additional_kwargs": new_additional_kwargs, - "id": new_id, - }, - deep=False, - ) - - # v1 / Chat Completions def _convert_from_v1_to_chat_completions(message: AIMessage) -> AIMessage: """Convert a v1 message to the Chat Completions format.""" diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index bb9e717282a6d..c45dada48ee5f 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -69,6 +69,9 @@ OutputTokenDetails, UsageMetadata, ) +from langchain_core.messages.block_translators.openai import ( + _convert_from_v03_ai_message, +) from langchain_core.messages.tool import tool_call_chunk from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser from langchain_core.output_parsers.openai_tools import ( @@ -107,7 +110,6 @@ _get_default_httpx_client, ) from langchain_openai.chat_models._compat import ( - _convert_from_v03_ai_message, _convert_from_v1_to_chat_completions, _convert_from_v1_to_responses, _convert_to_v03_ai_message, diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index 69196a315fc3e..4bb7ca467eda3 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -22,6 +22,9 @@ ) from langchain_core.messages import content as types from langchain_core.messages.ai import UsageMetadata +from langchain_core.messages.block_translators.openai import ( + _convert_from_v03_ai_message, +) from langchain_core.outputs import ChatGeneration, ChatResult from langchain_core.runnables import RunnableLambda from langchain_core.tracers.base import BaseTracer @@ -52,7 +55,6 @@ from langchain_openai import ChatOpenAI from langchain_openai.chat_models._compat import ( _FUNCTION_CALL_IDS_MAP_KEY, - _convert_from_v03_ai_message, _convert_from_v1_to_chat_completions, _convert_from_v1_to_responses, _convert_to_v03_ai_message, From 18d1cf236e9a372b2c6f77145ece5a3a3f0acdee Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 26 Aug 2025 16:42:58 -0400 Subject: [PATCH 63/73] . --- .../language_models/chat_models.py | 196 ++++++++++-------- .../language_models/fake_chat_models.py | 26 ++- .../unit_tests/fake/test_fake_chat_model.py | 68 +++--- .../language_models/chat_models/test_base.py | 4 + .../chat_models/test_output_version.py | 24 +-- .../chat_models/test_rate_limiting.py | 4 +- .../langchain_openai/chat_models/base.py | 4 +- 7 files changed, 191 insertions(+), 135 deletions(-) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index d77bddbbaa14b..ec06c0b42d0b3 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -447,6 +447,9 @@ def invoke( output_version if output_version is not None else self.output_version ) kwargs["_output_version"] = effective_output_version or "v0" + kwargs["_output_version_explicit"] = ( + output_version is not None or self.output_version is not None + ) return cast( "AIMessage", @@ -495,6 +498,9 @@ async def ainvoke( output_version if output_version is not None else self.output_version ) kwargs["_output_version"] = effective_output_version or "v0" + kwargs["_output_version_explicit"] = ( + output_version is not None or self.output_version is not None + ) llm_result = await self.agenerate_prompt( [self._convert_input(input)], @@ -573,6 +579,9 @@ def stream( output_version if output_version is not None else self.output_version ) kwargs["_output_version"] = effective_output_version or "v0" + kwargs["_output_version_explicit"] = ( + output_version is not None or self.output_version is not None + ) if not self._should_stream(async_api=False, **{**kwargs, "stream": True}): # model doesn't implement streaming, so use default implementation @@ -630,29 +639,26 @@ def stream( input_messages = _normalize_messages(messages) run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id))) yielded = False + # Filter out internal parameters before passing to implementation + filtered_kwargs = { + k: v + for k, v in kwargs.items() + if k not in ("_output_version", "_output_version_explicit") + } for chunk in self._stream( input_messages, stop=stop, - output_version=kwargs["_output_version"] or "v0", - **kwargs, + output_version=kwargs["_output_version"], + **filtered_kwargs, ): if chunk.message.id is None: chunk.message.id = run_id - chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) + response_metadata = _gen_info_and_msg_metadata(chunk) output_version = kwargs["_output_version"] - if ( - isinstance(chunk.message, (AIMessage, AIMessageChunk)) - and ( - not isinstance(chunk.message, AIMessageChunk) - or chunk.message.chunk_position != "last" - or chunk.message.content # Include last chunks with content - ) - and output_version - and output_version != "v0" - ): - chunk.message.additional_kwargs["output_version"] = ( - output_version - ) + # Add output_version to response_metadata if it was explicitly set + if kwargs.get("_output_version_explicit", False): + response_metadata["output_version"] = output_version + chunk.message.response_metadata = response_metadata if output_version == "v1": # Overwrite .content with .content_blocks chunk.message = _update_message_content_to_blocks( @@ -734,6 +740,9 @@ async def astream( output_version if output_version is not None else self.output_version ) kwargs["_output_version"] = effective_output_version or "v0" + kwargs["_output_version_explicit"] = ( + output_version is not None or self.output_version is not None + ) if not self._should_stream(async_api=True, **{**kwargs, "stream": True}): # No async or sync stream is implemented, so fall back to ainvoke @@ -793,27 +802,26 @@ async def astream( input_messages = _normalize_messages(messages) run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id))) yielded = False + # Filter out internal parameters before passing to implementation + filtered_kwargs = { + k: v + for k, v in kwargs.items() + if k not in ("_output_version", "_output_version_explicit") + } async for chunk in self._astream( input_messages, stop=stop, - output_version=kwargs["_output_version"] or "v0", - **kwargs, + output_version=kwargs["_output_version"], + **filtered_kwargs, ): if chunk.message.id is None: chunk.message.id = run_id - chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) + response_metadata = _gen_info_and_msg_metadata(chunk) output_version = kwargs["_output_version"] - if ( - isinstance(chunk.message, (AIMessage, AIMessageChunk)) - and ( - not isinstance(chunk.message, AIMessageChunk) - or chunk.message.chunk_position != "last" - or chunk.message.content # Include last chunks with content - ) - and output_version - and output_version != "v0" - ): - chunk.message.additional_kwargs["output_version"] = output_version + # Add output_version to response_metadata if it was explicitly set + if kwargs.get("_output_version_explicit", False): + response_metadata["output_version"] = output_version + chunk.message.response_metadata = response_metadata if output_version == "v1": # Overwrite .content with .content_blocks chunk.message = _update_message_content_to_blocks( @@ -1286,6 +1294,7 @@ def _generate_with_cache( self.rate_limiter.acquire(blocking=True) output_version = kwargs.pop("_output_version", self.output_version) + output_version_explicit = kwargs.pop("_output_version_explicit", False) # If stream is not explicitly set, check if implicitly requested by # astream_events() or astream_log(). Bail out if _stream not implemented @@ -1299,21 +1308,20 @@ def _generate_with_cache( f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None ) yielded = False + # Filter out internal parameters before passing to implementation + filtered_kwargs = { + k: v + for k, v in kwargs.items() + if k not in ("_output_version", "_output_version_explicit") + } for chunk in self._stream( - messages, stop=stop, output_version=output_version or "v0", **kwargs + messages, stop=stop, output_version=output_version, **filtered_kwargs ): - chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) - if ( - isinstance(chunk.message, (AIMessage, AIMessageChunk)) - and ( - not isinstance(chunk.message, AIMessageChunk) - or chunk.message.chunk_position != "last" - or chunk.message.content # Include last chunks with content - ) - and output_version - and output_version != "v0" - ): - chunk.message.additional_kwargs["output_version"] = output_version + response_metadata = _gen_info_and_msg_metadata(chunk) + # Add output_version to response_metadata if it was explicitly set + if output_version_explicit: + response_metadata["output_version"] = output_version + chunk.message.response_metadata = response_metadata if run_manager: if chunk.message.id is None: chunk.message.id = run_id @@ -1346,18 +1354,28 @@ def _generate_with_cache( run_manager.on_llm_new_token("", chunk=chunk) chunks.append(chunk) result = generate_from_stream(iter(chunks)) - elif inspect.signature(self._generate).parameters.get("run_manager"): - result = self._generate( - messages, - stop=stop, - run_manager=run_manager, - output_version=output_version or "v0", - **kwargs, - ) else: - result = self._generate( - messages, stop=stop, output_version=output_version or "v0", **kwargs - ) + # Filter out internal parameters before passing to implementation + filtered_kwargs = { + k: v + for k, v in kwargs.items() + if k not in ("_output_version", "_output_version_explicit") + } + if inspect.signature(self._generate).parameters.get("run_manager"): + result = self._generate( + messages, + stop=stop, + run_manager=run_manager, + output_version=output_version, + **filtered_kwargs, + ) + else: + result = self._generate( + messages, + stop=stop, + output_version=output_version, + **filtered_kwargs, + ) if output_version == "v1": # Overwrite .content with .content_blocks @@ -1370,15 +1388,11 @@ def _generate_with_cache( for idx, generation in enumerate(result.generations): if run_manager and generation.message.id is None: generation.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}-{idx}" - generation.message.response_metadata = _gen_info_and_msg_metadata( - generation - ) - if ( - isinstance(generation.message, (AIMessage, AIMessageChunk)) - and output_version - and output_version != "v0" - ): - generation.message.additional_kwargs["output_version"] = output_version + response_metadata = _gen_info_and_msg_metadata(generation) + # Add output_version to response_metadata if it was explicitly set + if output_version_explicit: + response_metadata["output_version"] = output_version + generation.message.response_metadata = response_metadata if len(result.generations) == 1 and result.llm_output is not None: result.generations[0].message.response_metadata = { **result.llm_output, @@ -1421,6 +1435,7 @@ async def _agenerate_with_cache( await self.rate_limiter.aacquire(blocking=True) output_version = kwargs.pop("_output_version", self.output_version) + output_version_explicit = kwargs.pop("_output_version_explicit", False) # If stream is not explicitly set, check if implicitly requested by # astream_events() or astream_log(). Bail out if _astream not implemented @@ -1434,21 +1449,20 @@ async def _agenerate_with_cache( f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None ) yielded = False + # Filter out internal parameters before passing to implementation + filtered_kwargs = { + k: v + for k, v in kwargs.items() + if k not in ("_output_version", "_output_version_explicit") + } async for chunk in self._astream( - messages, stop=stop, output_version=output_version or "v0", **kwargs + messages, stop=stop, output_version=output_version, **filtered_kwargs ): - chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk) - if ( - isinstance(chunk.message, (AIMessage, AIMessageChunk)) - and ( - not isinstance(chunk.message, AIMessageChunk) - or chunk.message.chunk_position != "last" - or chunk.message.content # Include last chunks with content - ) - and output_version - and output_version != "v0" - ): - chunk.message.additional_kwargs["output_version"] = output_version + response_metadata = _gen_info_and_msg_metadata(chunk) + # Add output_version to response_metadata if it was explicitly set + if output_version_explicit: + response_metadata["output_version"] = output_version + chunk.message.response_metadata = response_metadata if run_manager: if chunk.message.id is None: chunk.message.id = run_id @@ -1482,16 +1496,28 @@ async def _agenerate_with_cache( chunks.append(chunk) result = generate_from_stream(iter(chunks)) elif inspect.signature(self._agenerate).parameters.get("run_manager"): + # Filter out internal parameters before passing to implementation + filtered_kwargs = { + k: v + for k, v in kwargs.items() + if k not in ("_output_version", "_output_version_explicit") + } result = await self._agenerate( messages, stop=stop, run_manager=run_manager, output_version=output_version, - **kwargs, + **filtered_kwargs, ) else: + # Filter out internal parameters before passing to implementation + filtered_kwargs = { + k: v + for k, v in kwargs.items() + if k not in ("_output_version", "_output_version_explicit") + } result = await self._agenerate( - messages, stop=stop, output_version=output_version, **kwargs + messages, stop=stop, output_version=output_version, **filtered_kwargs ) if output_version == "v1": @@ -1505,15 +1531,11 @@ async def _agenerate_with_cache( for idx, generation in enumerate(result.generations): if run_manager and generation.message.id is None: generation.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}-{idx}" - generation.message.response_metadata = _gen_info_and_msg_metadata( - generation - ) - if ( - isinstance(generation.message, (AIMessage, AIMessageChunk)) - and output_version - and output_version != "v0" - ): - generation.message.additional_kwargs["output_version"] = output_version + response_metadata = _gen_info_and_msg_metadata(generation) + # Add output_version to response_metadata if it was explicitly set + if output_version_explicit: + response_metadata["output_version"] = output_version + generation.message.response_metadata = response_metadata if len(result.generations) == 1 and result.llm_output is not None: result.generations[0].message.response_metadata = { **result.llm_output, diff --git a/libs/core/langchain_core/language_models/fake_chat_models.py b/libs/core/langchain_core/language_models/fake_chat_models.py index fc072866aeac3..daca70effe76a 100644 --- a/libs/core/langchain_core/language_models/fake_chat_models.py +++ b/libs/core/langchain_core/language_models/fake_chat_models.py @@ -247,6 +247,8 @@ def _generate( messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, + *, + output_version: str = "v0", **kwargs: Any, ) -> ChatResult: """Top Level call.""" @@ -260,11 +262,17 @@ def _stream( messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, + *, + output_version: str = "v0", **kwargs: Any, ) -> Iterator[ChatGenerationChunk]: """Stream the output of the model.""" chat_result = self._generate( - messages, stop=stop, run_manager=run_manager, **kwargs + messages, + stop=stop, + run_manager=run_manager, + output_version=output_version, + **kwargs, ) if not isinstance(chat_result, ChatResult): msg = ( @@ -303,6 +311,18 @@ def _stream( and not message.additional_kwargs ): chunk.message.chunk_position = "last" + + if output_version == "v1": + from langchain_core.language_models._utils import ( + _update_message_content_to_blocks, + ) + + chunk.message = _update_message_content_to_blocks( + chunk.message, "v1" + ) + + chunk.message.response_metadata = {"output_version": output_version} + if run_manager: run_manager.on_llm_new_token(token, chunk=chunk) yield chunk @@ -323,7 +343,6 @@ def _stream( content="", additional_kwargs={ "function_call": {fkey: fvalue_chunk}, - "output_version": "v0", }, ) ) @@ -340,7 +359,6 @@ def _stream( content="", additional_kwargs={ "function_call": {fkey: fvalue}, - "output_version": "v0", }, ) ) @@ -355,7 +373,7 @@ def _stream( message=AIMessageChunk( id=message.id, content="", - additional_kwargs={key: value, "output_version": "v0"}, + additional_kwargs={key: value}, ) ) if run_manager: diff --git a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py index 72242aee2518b..182305e04cc10 100644 --- a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py +++ b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py @@ -28,11 +28,11 @@ def test_generic_fake_chat_model_invoke() -> None: infinite_cycle = cycle([AIMessage(content="hello"), AIMessage(content="goodbye")]) model = GenericFakeChatModel(messages=infinite_cycle) response = model.invoke("meow") - assert response == _any_id_ai_message(content="hello") + assert response == _any_id_ai_message(content="hello", response_metadata={}) response = model.invoke("kitty") - assert response == _any_id_ai_message(content="goodbye") + assert response == _any_id_ai_message(content="goodbye", response_metadata={}) response = model.invoke("meow") - assert response == _any_id_ai_message(content="hello") + assert response == _any_id_ai_message(content="hello", response_metadata={}) async def test_generic_fake_chat_model_ainvoke() -> None: @@ -40,11 +40,11 @@ async def test_generic_fake_chat_model_ainvoke() -> None: infinite_cycle = cycle([AIMessage(content="hello"), AIMessage(content="goodbye")]) model = GenericFakeChatModel(messages=infinite_cycle) response = await model.ainvoke("meow") - assert response == _any_id_ai_message(content="hello") + assert response == _any_id_ai_message(content="hello", response_metadata={}) response = await model.ainvoke("kitty") - assert response == _any_id_ai_message(content="goodbye") + assert response == _any_id_ai_message(content="goodbye", response_metadata={}) response = await model.ainvoke("meow") - assert response == _any_id_ai_message(content="hello") + assert response == _any_id_ai_message(content="hello", response_metadata={}) async def test_generic_fake_chat_model_stream() -> None: @@ -57,22 +57,24 @@ async def test_generic_fake_chat_model_stream() -> None: model = GenericFakeChatModel(messages=infinite_cycle) chunks = [chunk async for chunk in model.astream("meow")] assert chunks == [ - _any_id_ai_message_chunk(content="hello"), - _any_id_ai_message_chunk(content=" "), + _any_id_ai_message_chunk(content="hello", response_metadata={}), + _any_id_ai_message_chunk(content=" ", response_metadata={}), _any_id_ai_message_chunk( content="goodbye", chunk_position="last", + response_metadata={}, ), ] assert len({chunk.id for chunk in chunks}) == 1 chunks = list(model.stream("meow")) assert chunks == [ - _any_id_ai_message_chunk(content="hello"), - _any_id_ai_message_chunk(content=" "), + _any_id_ai_message_chunk(content="hello", response_metadata={}), + _any_id_ai_message_chunk(content=" ", response_metadata={}), _any_id_ai_message_chunk( content="goodbye", chunk_position="last", + response_metadata={}, ), ] assert len({chunk.id for chunk in chunks}) == 1 @@ -84,12 +86,14 @@ async def test_generic_fake_chat_model_stream() -> None: chunks = [chunk async for chunk in model.astream("meow")] assert chunks == [ _any_id_ai_message_chunk( - content="", additional_kwargs={"foo": 42, "output_version": "v0"} + content="", additional_kwargs={"foo": 42}, response_metadata={} ), _any_id_ai_message_chunk( - content="", additional_kwargs={"bar": 24, "output_version": "v0"} + content="", additional_kwargs={"bar": 24}, response_metadata={} + ), + _any_id_ai_message_chunk( + content="", chunk_position="last", response_metadata={} ), - _any_id_ai_message_chunk(content="", chunk_position="last"), ] assert len({chunk.id for chunk in chunks}) == 1 @@ -111,31 +115,33 @@ async def test_generic_fake_chat_model_stream() -> None: content="", additional_kwargs={ "function_call": {"name": "move_file"}, - "output_version": "v0", }, + response_metadata={}, ), _any_id_ai_message_chunk( content="", additional_kwargs={ "function_call": {"arguments": '{\n "source_path": "foo"'}, - "output_version": "v0", }, + response_metadata={}, ), _any_id_ai_message_chunk( content="", additional_kwargs={ "function_call": {"arguments": ","}, - "output_version": "v0", }, + response_metadata={}, ), _any_id_ai_message_chunk( content="", additional_kwargs={ "function_call": {"arguments": '\n "destination_path": "bar"\n}'}, - "output_version": "v0", }, + response_metadata={}, + ), + _any_id_ai_message_chunk( + content="", chunk_position="last", response_metadata={} ), - _any_id_ai_message_chunk(content="", chunk_position="last"), ] assert len({chunk.id for chunk in chunks}) == 1 @@ -154,8 +160,8 @@ async def test_generic_fake_chat_model_stream() -> None: "arguments": '{\n "source_path": "foo",\n "' 'destination_path": "bar"\n}', }, - "output_version": "v0", }, + response_metadata={}, id=chunks[0].id, chunk_position="last", ) @@ -170,11 +176,12 @@ async def test_generic_fake_chat_model_astream_log() -> None: ] final = log_patches[-1] assert final.state["streamed_output"] == [ - _any_id_ai_message_chunk(content="hello"), - _any_id_ai_message_chunk(content=" "), + _any_id_ai_message_chunk(content="hello", response_metadata={}), + _any_id_ai_message_chunk(content=" ", response_metadata={}), _any_id_ai_message_chunk( content="goodbye", chunk_position="last", + response_metadata={}, ), ] assert len({chunk.id for chunk in final.state["streamed_output"]}) == 1 @@ -230,11 +237,12 @@ async def on_llm_new_token( ) ] assert results == [ - _any_id_ai_message_chunk(content="hello"), - _any_id_ai_message_chunk(content=" "), + _any_id_ai_message_chunk(content="hello", response_metadata={}), + _any_id_ai_message_chunk(content=" ", response_metadata={}), _any_id_ai_message_chunk( content="goodbye", chunk_position="last", + response_metadata={}, ), ] assert tokens == ["hello", " ", "goodbye"] @@ -245,19 +253,21 @@ def test_chat_model_inputs() -> None: fake = ParrotFakeChatModel() assert cast("HumanMessage", fake.invoke("hello")) == _any_id_human_message( - content="hello" + content="hello", response_metadata={} + ) + assert fake.invoke([("ai", "blah")]) == _any_id_ai_message( + content="blah", response_metadata={} ) - assert fake.invoke([("ai", "blah")]) == _any_id_ai_message(content="blah") assert fake.invoke([AIMessage(content="blah")]) == _any_id_ai_message( - content="blah" + content="blah", response_metadata={} ) def test_fake_list_chat_model_batch() -> None: expected = [ - _any_id_ai_message(content="a"), - _any_id_ai_message(content="b"), - _any_id_ai_message(content="c"), + _any_id_ai_message(content="a", response_metadata={}), + _any_id_ai_message(content="b", response_metadata={}), + _any_id_ai_message(content="c", response_metadata={}), ] for _ in range(20): # run this 20 times to test race condition in batch diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index bf6bfe526105c..8dd8a4634a704 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -367,6 +367,8 @@ def _generate( messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, + *, + output_version: str = "v0", **kwargs: Any, ) -> ChatResult: return ChatResult(generations=[ChatGeneration(message=AIMessage("invoke"))]) @@ -383,6 +385,8 @@ def _stream( messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, + *, + output_version: str = "v0", **kwargs: Any, ) -> Iterator[ChatGenerationChunk]: yield ChatGenerationChunk(message=AIMessageChunk(content="stream")) diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_output_version.py b/libs/core/tests/unit_tests/language_models/chat_models/test_output_version.py index daa4c2e822093..fcea6e3ba561a 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_output_version.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_output_version.py @@ -227,24 +227,24 @@ def _llm_type(self) -> str: class TestOutputVersionInMessages: - """Test output_version is added to message additional_kwargs.""" + """Test output_version is added to message response_metadata.""" - def test_output_version_added_to_message_additional_kwargs( + def test_output_version_added_to_message_response_metadata( self, messages: list[BaseMessage], ) -> None: - """Test that output_version is added to message additional_kwargs.""" + """Test that output_version is added to message response_metadata.""" model = OutputVersionTrackingChatModel( messages=iter(["test response"]), output_version="v1" ) result = model.invoke(messages, output_version="v2") - assert result.additional_kwargs["output_version"] == "v2" + assert result.response_metadata["output_version"] == "v2" - def test_output_version_added_to_stream_message_additional_kwargs( + def test_output_version_added_to_stream_message_response_metadata( self, messages: list[BaseMessage], ) -> None: - """Test that output_version is added to streamed message additional_kwargs.""" + """Test that output_version is added to streamed message response_metadata.""" model = OutputVersionTrackingChatModel( messages=iter(["test response"]), output_version="v1" ) @@ -255,14 +255,14 @@ def test_output_version_added_to_stream_message_additional_kwargs( assert len(content_chunks) >= 1 # Should have at least one content chunk for chunk in content_chunks: - assert "output_version" in chunk.additional_kwargs - assert chunk.additional_kwargs["output_version"] == "v2" + assert "output_version" in chunk.response_metadata + assert chunk.response_metadata["output_version"] == "v2" - async def test_output_version_added_to_astream_message_additional_kwargs( + async def test_output_version_added_to_astream_message_response_metadata( self, messages: list[BaseMessage], ) -> None: - """Test output_version added to async streamed additional_kwargs.""" + """Test output_version added to async streamed response_metadata.""" model = OutputVersionTrackingChatModel( messages=iter(["test response"]), output_version="v1" ) @@ -273,8 +273,8 @@ async def test_output_version_added_to_astream_message_additional_kwargs( assert len(content_chunks) >= 1 # Should have at least one content chunk for chunk in content_chunks: - assert "output_version" in chunk.additional_kwargs - assert chunk.additional_kwargs["output_version"] == "v2" + assert "output_version" in chunk.response_metadata + assert chunk.response_metadata["output_version"] == "v2" class TestOutputVersionMerging: diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_rate_limiting.py b/libs/core/tests/unit_tests/language_models/chat_models/test_rate_limiting.py index bf8cb1ab3ddf0..7997e0c097aaf 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_rate_limiting.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_rate_limiting.py @@ -216,8 +216,8 @@ def test_rate_limit_skips_cache() -> None: '[{"lc": 1, "type": "constructor", "id": ["langchain", "schema", ' '"messages", "HumanMessage"], "kwargs": {"content": "foo", ' '"type": "human"}}]', - "[('_output_version', 'v0'), ('_type', 'generic-fake-chat-model'), " - "('stop', None)]", + "[('_output_version', 'v0'), ('_output_version_explicit', False), ('_type'," + " 'generic-fake-chat-model'), ('stop', None)]", ) ] diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index bb9e717282a6d..dc101bdf7db22 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -1245,7 +1245,9 @@ def _get_request_payload( if stop is not None: kwargs["stop"] = stop - payload = {**self._default_params, **kwargs} + # Filter out parameters that shouldn't be passed to OpenAI API + filtered_kwargs = {k: v for k, v in kwargs.items() if k != "output_version"} + payload = {**self._default_params, **filtered_kwargs} if self._use_responses_api(payload): if self.use_previous_response_id: From e49156e63fa4ba1e16f7a750a123f1e751409b9b Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 26 Aug 2025 16:54:04 -0400 Subject: [PATCH 64/73] chore: rfc to use `.text` instead of `.text()` (#32699) --- docs/docs/how_to/multimodal_inputs.ipynb | 34 ++++++------- docs/docs/how_to/multimodal_prompts.ipynb | 8 +-- docs/docs/integrations/chat/anthropic.ipynb | 4 +- docs/docs/integrations/chat/bedrock.ipynb | 6 +-- docs/docs/integrations/chat/litellm.ipynb | 4 +- docs/docs/integrations/chat/openai.ipynb | 32 ++++++------ docs/docs/tutorials/agents.ipynb | 25 ++++------ .../integration_template/chat_models.py | 2 +- libs/core/langchain_core/messages/utils.py | 2 +- libs/core/tests/unit_tests/test_messages.py | 49 +++++++++---------- .../langchain/chains/documents/map_reduce.py | 8 +-- .../langchain/chains/documents/stuff.py | 8 +-- .../langchain_anthropic/chat_models.py | 4 +- .../langchain_deepseek/chat_models.py | 2 +- .../groq/langchain_groq/chat_models.py | 2 +- .../ollama/langchain_ollama/chat_models.py | 2 +- .../langchain_openai/chat_models/azure.py | 42 ++++++++-------- .../langchain_openai/chat_models/base.py | 8 +-- .../chat_models/test_base.py | 8 +-- .../chat_models/test_responses_api.py | 22 ++++----- .../partners/xai/langchain_xai/chat_models.py | 2 +- .../integration_tests/chat_models.py | 14 +++--- 22 files changed, 140 insertions(+), 148 deletions(-) diff --git a/docs/docs/how_to/multimodal_inputs.ipynb b/docs/docs/how_to/multimodal_inputs.ipynb index 9c3b98d6d5279..f52309ae638b2 100644 --- a/docs/docs/how_to/multimodal_inputs.ipynb +++ b/docs/docs/how_to/multimodal_inputs.ipynb @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "1fcf7b27-1cc3-420a-b920-0420b5892e20", "metadata": {}, "outputs": [ @@ -102,7 +102,7 @@ " ],\n", "}\n", "response = llm.invoke([message])\n", - "print(response.text())" + "print(response.text)" ] }, { @@ -133,7 +133,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "99d27f8f-ae78-48bc-9bf2-3cef35213ec7", "metadata": {}, "outputs": [ @@ -163,7 +163,7 @@ " ],\n", "}\n", "response = llm.invoke([message])\n", - "print(response.text())" + "print(response.text)" ] }, { @@ -176,7 +176,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "325fb4ca", "metadata": {}, "outputs": [ @@ -198,7 +198,7 @@ " ],\n", "}\n", "response = llm.invoke([message])\n", - "print(response.text())" + "print(response.text)" ] }, { @@ -234,7 +234,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "6c1455a9-699a-4702-a7e0-7f6eaec76a21", "metadata": {}, "outputs": [ @@ -284,7 +284,7 @@ " ],\n", "}\n", "response = llm.invoke([message])\n", - "print(response.text())" + "print(response.text)" ] }, { @@ -312,7 +312,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "55e1d937-3b22-4deb-b9f0-9e688f0609dc", "metadata": {}, "outputs": [ @@ -342,7 +342,7 @@ " ],\n", "}\n", "response = llm.invoke([message])\n", - "print(response.text())" + "print(response.text)" ] }, { @@ -417,7 +417,7 @@ " ],\n", "}\n", "response = llm.invoke([message])\n", - "print(response.text())" + "print(response.text)" ] }, { @@ -443,7 +443,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "83593b9d-a8d3-4c99-9dac-64e0a9d397cb", "metadata": {}, "outputs": [ @@ -488,13 +488,13 @@ " ],\n", "}\n", "response = llm.invoke([message])\n", - "print(response.text())\n", + "print(response.text)\n", "response.usage_metadata" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "9bbf578e-794a-4dc0-a469-78c876ccd4a3", "metadata": {}, "outputs": [ @@ -530,7 +530,7 @@ " ],\n", "}\n", "response = llm.invoke([message, response, next_message])\n", - "print(response.text())\n", + "print(response.text)\n", "response.usage_metadata" ] }, @@ -600,7 +600,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "ae076c9b-ff8f-461d-9349-250f396c9a25", "metadata": {}, "outputs": [ @@ -641,7 +641,7 @@ " ],\n", "}\n", "response = llm.invoke([message])\n", - "print(response.text())" + "print(response.text)" ] }, { diff --git a/docs/docs/how_to/multimodal_prompts.ipynb b/docs/docs/how_to/multimodal_prompts.ipynb index 09757321ba03f..d38690b29ad9e 100644 --- a/docs/docs/how_to/multimodal_prompts.ipynb +++ b/docs/docs/how_to/multimodal_prompts.ipynb @@ -54,7 +54,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "5df2e558-321d-4cf7-994e-2815ac37e704", "metadata": {}, "outputs": [ @@ -75,7 +75,7 @@ "\n", "chain = prompt | llm\n", "response = chain.invoke({\"image_url\": url})\n", - "print(response.text())" + "print(response.text)" ] }, { @@ -117,7 +117,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "25e4829e-0073-49a8-9669-9f43e5778383", "metadata": {}, "outputs": [ @@ -144,7 +144,7 @@ " \"cache_type\": \"ephemeral\",\n", " }\n", ")\n", - "print(response.text())" + "print(response.text)" ] }, { diff --git a/docs/docs/integrations/chat/anthropic.ipynb b/docs/docs/integrations/chat/anthropic.ipynb index 380f671284a6f..12d0710c67974 100644 --- a/docs/docs/integrations/chat/anthropic.ipynb +++ b/docs/docs/integrations/chat/anthropic.ipynb @@ -1457,7 +1457,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "30a0af36-2327-4b1d-9ba5-e47cb72db0be", "metadata": {}, "outputs": [ @@ -1493,7 +1493,7 @@ "response = llm_with_tools.invoke(\n", " \"There's a syntax error in my primes.py file. Can you help me fix it?\"\n", ")\n", - "print(response.text())\n", + "print(response.text)\n", "response.tool_calls" ] }, diff --git a/docs/docs/integrations/chat/bedrock.ipynb b/docs/docs/integrations/chat/bedrock.ipynb index a246357a25ed2..06b1223f28285 100644 --- a/docs/docs/integrations/chat/bedrock.ipynb +++ b/docs/docs/integrations/chat/bedrock.ipynb @@ -243,12 +243,12 @@ "id": "0ef05abb-9c04-4dc3-995e-f857779644d5", "metadata": {}, "source": [ - "You can filter to text using the [.text()](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.text) method on the output:" + "You can filter to text using the [.text](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.text) property on the output:" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "2a4e743f-ea7d-4e5a-9b12-f9992362de8b", "metadata": {}, "outputs": [ @@ -262,7 +262,7 @@ ], "source": [ "for chunk in llm.stream(messages):\n", - " print(chunk.text(), end=\"|\")" + " print(chunk.text, end=\"|\")" ] }, { diff --git a/docs/docs/integrations/chat/litellm.ipynb b/docs/docs/integrations/chat/litellm.ipynb index 2028189dcaa7c..a697e1d5b32a6 100644 --- a/docs/docs/integrations/chat/litellm.ipynb +++ b/docs/docs/integrations/chat/litellm.ipynb @@ -261,7 +261,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "c5fac0e9-05a4-4fc1-a3b3-e5bbb24b971b", "metadata": { "colab": { @@ -286,7 +286,7 @@ ], "source": [ "async for token in llm.astream(\"Hello, please explain how antibiotics work\"):\n", - " print(token.text(), end=\"\")" + " print(token.text, end=\"\")" ] }, { diff --git a/docs/docs/integrations/chat/openai.ipynb b/docs/docs/integrations/chat/openai.ipynb index 54db230ba9523..d72ffcaf77379 100644 --- a/docs/docs/integrations/chat/openai.ipynb +++ b/docs/docs/integrations/chat/openai.ipynb @@ -814,7 +814,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "1f758726-33ef-4c04-8a54-49adb783bbb3", "metadata": {}, "outputs": [ @@ -860,7 +860,7 @@ "llm_with_tools = llm.bind_tools([tool])\n", "\n", "response = llm_with_tools.invoke(\"What is deep research by OpenAI?\")\n", - "print(response.text())" + "print(response.text)" ] }, { @@ -1151,7 +1151,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "073f6010-6b0e-4db6-b2d3-7427c8dec95b", "metadata": {}, "outputs": [ @@ -1167,7 +1167,7 @@ } ], "source": [ - "response_2.text()" + "response_2.text" ] }, { @@ -1198,7 +1198,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "b6da5bd6-a44a-4c64-970b-30da26b003d6", "metadata": {}, "outputs": [ @@ -1214,7 +1214,7 @@ } ], "source": [ - "response_2.text()" + "response_2.text" ] }, { @@ -1404,7 +1404,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "51d3e4d3-ea78-426c-9205-aecb0937fca7", "metadata": {}, "outputs": [ @@ -1428,13 +1428,13 @@ "messages = [{\"role\": \"user\", \"content\": first_query}]\n", "\n", "response = llm_with_tools.invoke(messages)\n", - "response_text = response.text()\n", + "response_text = response.text\n", "print(f\"{response_text[:100]}... {response_text[-100:]}\")" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "b248bedf-2050-4c17-a90e-3a26eeb1b055", "metadata": {}, "outputs": [ @@ -1460,7 +1460,7 @@ " ]\n", ")\n", "second_response = llm_with_tools.invoke(messages)\n", - "print(second_response.text())" + "print(second_response.text)" ] }, { @@ -1482,7 +1482,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "009e541a-b372-410e-b9dd-608a8052ce09", "metadata": {}, "outputs": [ @@ -1502,12 +1502,12 @@ " output_version=\"responses/v1\",\n", ")\n", "response = llm.invoke(\"Hi, I'm Bob.\")\n", - "print(response.text())" + "print(response.text)" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "393a443a-4c5f-4a07-bc0e-c76e529b35e3", "metadata": {}, "outputs": [ @@ -1524,7 +1524,7 @@ " \"What is my name?\",\n", " previous_response_id=response.response_metadata[\"id\"],\n", ")\n", - "print(second_response.text())" + "print(second_response.text)" ] }, { @@ -1589,7 +1589,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "8d322f3a-0732-45ab-ac95-dfd4596e0d85", "metadata": {}, "outputs": [ @@ -1616,7 +1616,7 @@ "response = llm.invoke(\"What is 3^3?\")\n", "\n", "# Output\n", - "response.text()" + "response.text" ] }, { diff --git a/docs/docs/tutorials/agents.ipynb b/docs/docs/tutorials/agents.ipynb index 69c9140e6dd5a..91adfa9e900a1 100644 --- a/docs/docs/tutorials/agents.ipynb +++ b/docs/docs/tutorials/agents.ipynb @@ -302,7 +302,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "c96c960b", "metadata": {}, "outputs": [ @@ -320,7 +320,7 @@ "source": [ "query = \"Hi!\"\n", "response = model.invoke([{\"role\": \"user\", \"content\": query}])\n", - "response.text()" + "response.text" ] }, { @@ -351,7 +351,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "b6a7e925", "metadata": {}, "outputs": [ @@ -371,7 +371,7 @@ "query = \"Hi!\"\n", "response = model_with_tools.invoke([{\"role\": \"user\", \"content\": query}])\n", "\n", - "print(f\"Message content: {response.text()}\\n\")\n", + "print(f\"Message content: {response.text}\\n\")\n", "print(f\"Tool calls: {response.tool_calls}\")" ] }, @@ -385,7 +385,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "688b465d", "metadata": {}, "outputs": [ @@ -403,7 +403,7 @@ "query = \"Search for the weather in SF\"\n", "response = model_with_tools.invoke([{\"role\": \"user\", \"content\": query}])\n", "\n", - "print(f\"Message content: {response.text()}\\n\")\n", + "print(f\"Message content: {response.text}\\n\")\n", "print(f\"Tool calls: {response.tool_calls}\")" ] }, @@ -615,19 +615,12 @@ "## Streaming tokens\n", "\n", "In addition to streaming back messages, it is also useful to stream back tokens.\n", - "We can do this by specifying `stream_mode=\"messages\"`.\n", - "\n", - "\n", - "::: note\n", - "\n", - "Below we use `message.text()`, which requires `langchain-core>=0.3.37`.\n", - "\n", - ":::" + "We can do this by specifying `stream_mode=\"messages\"`." ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "63198158-380e-43a3-a2ad-d4288949c1d4", "metadata": {}, "outputs": [ @@ -651,7 +644,7 @@ "for step, metadata in agent_executor.stream(\n", " {\"messages\": [input_message]}, stream_mode=\"messages\"\n", "):\n", - " if metadata[\"langgraph_node\"] == \"agent\" and (text := step.text()):\n", + " if metadata[\"langgraph_node\"] == \"agent\" and (text := step.text):\n", " print(text, end=\"|\")" ] }, diff --git a/libs/cli/langchain_cli/integration_template/integration_template/chat_models.py b/libs/cli/langchain_cli/integration_template/integration_template/chat_models.py index 9703b50358a64..fe16108ea165e 100644 --- a/libs/cli/langchain_cli/integration_template/integration_template/chat_models.py +++ b/libs/cli/langchain_cli/integration_template/integration_template/chat_models.py @@ -86,7 +86,7 @@ class Chat__ModuleName__(BaseChatModel): .. code-block:: python for chunk in llm.stream(messages): - print(chunk.text(), end="") + print(chunk.text, end="") .. code-block:: python diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py index 282f05a89fbe0..df26f9b53c512 100644 --- a/libs/core/langchain_core/messages/utils.py +++ b/libs/core/langchain_core/messages/utils.py @@ -133,7 +133,7 @@ def get_buffer_string( else: msg = f"Got unsupported message type: {m}" raise ValueError(msg) # noqa: TRY004 - message = f"{role}: {m.text()}" + message = f"{role}: {m.text}" if isinstance(m, AIMessage) and "function_call" in m.additional_kwargs: message += f"{m.additional_kwargs['function_call']}" string_messages.append(message) diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index d00e8af3496ac..d04bf511dc89f 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -1066,9 +1066,9 @@ def test_message_text() -> None: # content: [empty], [single element], [multiple elements] # content dict types: [text], [not text], [no type] - assert HumanMessage(content="foo").text() == "foo" - assert AIMessage(content=[]).text() == "" - assert AIMessage(content=["foo", "bar"]).text() == "foobar" + assert HumanMessage(content="foo").text == "foo" + assert AIMessage(content=[]).text == "" + assert AIMessage(content=["foo", "bar"]).text == "foobar" assert ( AIMessage( content=[ @@ -1080,12 +1080,11 @@ def test_message_text() -> None: "input": {"location": "San Francisco, CA"}, }, ] - ).text() + ).text == "thinking..." ) assert ( - SystemMessage(content=[{"type": "text", "text": "foo"}, "bar"]).text() - == "foobar" + SystemMessage(content=[{"type": "text", "text": "foo"}, "bar"]).text == "foobar" ) assert ( ToolMessage( @@ -1101,18 +1100,18 @@ def test_message_text() -> None: }, ], tool_call_id="1", - ).text() + ).text == "15 degrees" ) assert ( - AIMessage(content=[{"text": "hi there"}, "hi"]).text() == "hi" + AIMessage(content=[{"text": "hi there"}, "hi"]).text == "hi" ) # missing type: text - assert AIMessage(content=[{"type": "nottext", "text": "hi"}]).text() == "" - assert AIMessage(content=[]).text() == "" + assert AIMessage(content=[{"type": "nottext", "text": "hi"}]).text == "" + assert AIMessage(content=[]).text == "" assert ( AIMessage( content="", tool_calls=[create_tool_call(name="a", args={"b": 1}, id=None)] - ).text() + ).text == "" ) @@ -1312,30 +1311,30 @@ def test_text_accessor() -> None: """Test that `message.text` property and `.text()` method return the same value.""" human_msg = HumanMessage(content="Hello world") assert human_msg.text == "Hello world" - assert human_msg.text() == "Hello world" - assert str(human_msg.text) == str(human_msg.text()) + assert human_msg.text == "Hello world" + assert str(human_msg.text) == str(human_msg.text) system_msg = SystemMessage(content="You are a helpful assistant") assert system_msg.text == "You are a helpful assistant" - assert system_msg.text() == "You are a helpful assistant" - assert str(system_msg.text) == str(system_msg.text()) + assert system_msg.text == "You are a helpful assistant" + assert str(system_msg.text) == str(system_msg.text) ai_msg = AIMessage(content="I can help you with that") assert ai_msg.text == "I can help you with that" - assert ai_msg.text() == "I can help you with that" - assert str(ai_msg.text) == str(ai_msg.text()) + assert ai_msg.text == "I can help you with that" + assert str(ai_msg.text) == str(ai_msg.text) tool_msg = ToolMessage(content="Task completed", tool_call_id="tool_1") assert tool_msg.text == "Task completed" - assert tool_msg.text() == "Task completed" - assert str(tool_msg.text) == str(tool_msg.text()) + assert tool_msg.text == "Task completed" + assert str(tool_msg.text) == str(tool_msg.text) complex_msg = HumanMessage( content=[{"type": "text", "text": "Hello "}, {"type": "text", "text": "world"}] ) assert complex_msg.text == "Hello world" - assert complex_msg.text() == "Hello world" - assert str(complex_msg.text) == str(complex_msg.text()) + assert complex_msg.text == "Hello world" + assert str(complex_msg.text) == str(complex_msg.text) mixed_msg = AIMessage( content=[ @@ -1345,10 +1344,10 @@ def test_text_accessor() -> None: ] ) assert mixed_msg.text == "The answer is 42" - assert mixed_msg.text() == "The answer is 42" - assert str(mixed_msg.text) == str(mixed_msg.text()) + assert mixed_msg.text == "The answer is 42" + assert str(mixed_msg.text) == str(mixed_msg.text) empty_msg = HumanMessage(content=[]) assert empty_msg.text == "" - assert empty_msg.text() == "" - assert str(empty_msg.text) == str(empty_msg.text()) + assert empty_msg.text == "" + assert str(empty_msg.text) == str(empty_msg.text) diff --git a/libs/langchain_v1/langchain/chains/documents/map_reduce.py b/libs/langchain_v1/langchain/chains/documents/map_reduce.py index 648bedc1ade8b..5e9eb7be0a27c 100644 --- a/libs/langchain_v1/langchain/chains/documents/map_reduce.py +++ b/libs/langchain_v1/langchain/chains/documents/map_reduce.py @@ -329,7 +329,7 @@ def _map_node( ) -> dict[str, list[ExtractionResult]]: prompt = self._get_map_prompt(state, runtime) response = cast("AIMessage", self.model.invoke(prompt, config=config)) - result = response if self.response_format else response.text() + result = response if self.response_format else response.text extraction_result: ExtractionResult = { "indexes": state["indexes"], "result": result, @@ -345,7 +345,7 @@ async def _amap_node( response = cast( "AIMessage", await self.model.ainvoke(prompt, config=config) ) - result = response if self.response_format else response.text() + result = response if self.response_format else response.text extraction_result: ExtractionResult = { "indexes": state["indexes"], "result": result, @@ -366,7 +366,7 @@ def _reduce_node( ) -> MapReduceNodeUpdate: prompt = self._get_reduce_prompt(state, runtime) response = cast("AIMessage", self.model.invoke(prompt, config=config)) - result = response if self.response_format else response.text() + result = response if self.response_format else response.text return {"result": result} async def _areduce_node( @@ -378,7 +378,7 @@ async def _areduce_node( response = cast( "AIMessage", await self.model.ainvoke(prompt, config=config) ) - result = response if self.response_format else response.text() + result = response if self.response_format else response.text return {"result": result} return RunnableCallable( diff --git a/libs/langchain_v1/langchain/chains/documents/stuff.py b/libs/langchain_v1/langchain/chains/documents/stuff.py index 194e85f02bc1c..444301a9f2235 100644 --- a/libs/langchain_v1/langchain/chains/documents/stuff.py +++ b/libs/langchain_v1/langchain/chains/documents/stuff.py @@ -319,12 +319,12 @@ def _process_node( # Initial processing prompt = self._get_initial_prompt(state, runtime) response = cast("AIMessage", self.model.invoke(prompt, config=config)) - result = response if self.response_format else response.text() + result = response if self.response_format else response.text return {"result": result} # Refinement prompt = self._get_refine_prompt(state, runtime) response = cast("AIMessage", self.model.invoke(prompt, config=config)) - result = response if self.response_format else response.text() + result = response if self.response_format else response.text return {"result": result} async def _aprocess_node( @@ -343,14 +343,14 @@ async def _aprocess_node( response = cast( "AIMessage", await self.model.ainvoke(prompt, config=config) ) - result = response if self.response_format else response.text() + result = response if self.response_format else response.text return {"result": result} # Refinement prompt = await self._aget_refine_prompt(state, runtime) response = cast( "AIMessage", await self.model.ainvoke(prompt, config=config) ) - result = response if self.response_format else response.text() + result = response if self.response_format else response.text return {"result": result} return RunnableCallable( diff --git a/libs/partners/anthropic/langchain_anthropic/chat_models.py b/libs/partners/anthropic/langchain_anthropic/chat_models.py index 9dd6b6a00b215..953f1cf6a3d43 100644 --- a/libs/partners/anthropic/langchain_anthropic/chat_models.py +++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py @@ -655,7 +655,7 @@ class ChatAnthropic(BaseChatModel): .. code-block:: python for chunk in llm.stream(messages): - print(chunk.text(), end="") + print(chunk.text, end="") .. code-block:: python @@ -1252,7 +1252,7 @@ def get_weather(location: str) -> str: response = llm_with_tools.invoke( "There's a syntax error in my primes.py file. Can you help me fix it?" ) - print(response.text()) + print(response.text) response.tool_calls .. code-block:: none diff --git a/libs/partners/deepseek/langchain_deepseek/chat_models.py b/libs/partners/deepseek/langchain_deepseek/chat_models.py index 68c5b8c6b440e..7d0a01f971b15 100644 --- a/libs/partners/deepseek/langchain_deepseek/chat_models.py +++ b/libs/partners/deepseek/langchain_deepseek/chat_models.py @@ -84,7 +84,7 @@ class ChatDeepSeek(BaseChatOpenAI): .. code-block:: python for chunk in llm.stream(messages): - print(chunk.text(), end="") + print(chunk.text, end="") .. code-block:: python diff --git a/libs/partners/groq/langchain_groq/chat_models.py b/libs/partners/groq/langchain_groq/chat_models.py index 6b1a7bbe08447..17b141df55895 100644 --- a/libs/partners/groq/langchain_groq/chat_models.py +++ b/libs/partners/groq/langchain_groq/chat_models.py @@ -160,7 +160,7 @@ class ChatGroq(BaseChatModel): # Streaming `text` for each content chunk received for chunk in llm.stream(messages): - print(chunk.text(), end="") + print(chunk.text, end="") .. code-block:: python diff --git a/libs/partners/ollama/langchain_ollama/chat_models.py b/libs/partners/ollama/langchain_ollama/chat_models.py index d0ed1cd5b6eb5..0301b55e5aaea 100644 --- a/libs/partners/ollama/langchain_ollama/chat_models.py +++ b/libs/partners/ollama/langchain_ollama/chat_models.py @@ -286,7 +286,7 @@ class ChatOllama(BaseChatModel): .. code-block:: python for chunk in llm.stream("Return the words Hello World!"): - print(chunk.text(), end="") + print(chunk.text, end="") .. code-block:: python diff --git a/libs/partners/openai/langchain_openai/chat_models/azure.py b/libs/partners/openai/langchain_openai/chat_models/azure.py index ab67b8917d548..9fb26599fe07d 100644 --- a/libs/partners/openai/langchain_openai/chat_models/azure.py +++ b/libs/partners/openai/langchain_openai/chat_models/azure.py @@ -181,7 +181,7 @@ class AzureChatOpenAI(BaseChatOpenAI): .. code-block:: python for chunk in llm.stream(messages): - print(chunk.text(), end="") + print(chunk.text, end="") .. code-block:: python @@ -479,10 +479,10 @@ class Joke(BaseModel): Example: ``https://example-resource.azure.openai.com/`` """ deployment_name: Union[str, None] = Field(default=None, alias="azure_deployment") - """A model deployment. - + """A model deployment. + If given sets the base client URL to include ``/deployments/{azure_deployment}`` - + .. note:: This means you won't be able to use non-deployment endpoints. """ @@ -512,27 +512,27 @@ class Joke(BaseModel): """ azure_ad_token_provider: Union[Callable[[], str], None] = None """A function that returns an Azure Active Directory token. - + Will be invoked on every sync request. For async requests, will be invoked if ``azure_ad_async_token_provider`` is not provided. """ azure_ad_async_token_provider: Union[Callable[[], Awaitable[str]], None] = None """A function that returns an Azure Active Directory token. - + Will be invoked on every async request. """ model_version: str = "" """The version of the model (e.g. ``'0125'`` for ``'gpt-3.5-0125'``). - Azure OpenAI doesn't return model version with the response by default so it must + Azure OpenAI doesn't return model version with the response by default so it must be manually specified if you want to use this information downstream, e.g. when calculating costs. - When you specify the version, it will be appended to the model name in the - response. Setting correct version will help you to calculate the cost properly. - Model version is not validated, so make sure you set it correctly to get the + When you specify the version, it will be appended to the model name in the + response. Setting correct version will help you to calculate the cost properly. + Model version is not validated, so make sure you set it correctly to get the correct cost. """ @@ -547,34 +547,34 @@ class Joke(BaseModel): """ model_name: Optional[str] = Field(default=None, alias="model") # type: ignore[assignment] - """Name of the deployed OpenAI model, e.g. ``'gpt-4o'``, ``'gpt-35-turbo'``, etc. - + """Name of the deployed OpenAI model, e.g. ``'gpt-4o'``, ``'gpt-35-turbo'``, etc. + Distinct from the Azure deployment name, which is set by the Azure user. Used for tracing and token counting. - + .. warning:: Does NOT affect completion. """ disabled_params: Optional[dict[str, Any]] = Field(default=None) - """Parameters of the OpenAI client or chat.completions endpoint that should be + """Parameters of the OpenAI client or chat.completions endpoint that should be disabled for the given model. - Should be specified as ``{"param": None | ['val1', 'val2']}`` where the key is the + Should be specified as ``{"param": None | ['val1', 'val2']}`` where the key is the parameter and the value is either None, meaning that parameter should never be used, or it's a list of disabled values for the parameter. For example, older models may not support the ``'parallel_tool_calls'`` parameter at - all, in which case ``disabled_params={"parallel_tool_calls: None}`` can ben passed + all, in which case ``disabled_params={"parallel_tool_calls: None}`` can ben passed in. - + If a parameter is disabled then it will not be used by default in any methods, e.g. - in + in :meth:`~langchain_openai.chat_models.azure.AzureChatOpenAI.with_structured_output`. However this does not prevent a user from directly passed in the parameter during - invocation. - - By default, unless ``model_name="gpt-4o"`` is specified, then + invocation. + + By default, unless ``model_name="gpt-4o"`` is specified, then ``'parallel_tools_calls'`` will be disabled. """ diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index c45dada48ee5f..0caa2cf015b23 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -2169,7 +2169,7 @@ class ChatOpenAI(BaseChatOpenAI): # type: ignore[override] .. code-block:: python for chunk in llm.stream(messages): - print(chunk.text(), end="") + print(chunk.text, end="") .. code-block:: python @@ -2399,7 +2399,7 @@ class GetPopulation(BaseModel): output_version="responses/v1", ) response = llm.invoke("Hi, I'm Bob.") - response.text() + response.text .. code-block:: python @@ -2411,7 +2411,7 @@ class GetPopulation(BaseModel): "What is my name?", previous_response_id=response.response_metadata["id"], ) - second_response.text() + second_response.text .. code-block:: python @@ -2460,7 +2460,7 @@ class GetPopulation(BaseModel): response = llm.invoke("What is 3^3?") # Response text - print(f"Output: {response.text()}") + print(f"Output: {response.text}") # Reasoning summaries for block in response.content: diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py index 7f7c7a32ed535..56a4c79b8400e 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py @@ -72,7 +72,7 @@ def test_chat_openai_system_message(use_responses_api: bool) -> None: human_message = HumanMessage(content="Hello") response = chat.invoke([system_message, human_message]) assert isinstance(response, BaseMessage) - assert isinstance(response.text(), str) + assert isinstance(response.text, str) @pytest.mark.scheduled @@ -179,7 +179,7 @@ async def test_openai_abatch_tags(use_responses_api: bool) -> None: ["I'm Pickle Rick", "I'm not Pickle Rick"], config={"tags": ["foo"]} ) for token in result: - assert isinstance(token.text(), str) + assert isinstance(token.text, str) @pytest.mark.flaky(retries=3, delay=1) @@ -1002,8 +1002,8 @@ def test_o1(use_max_completion_tokens: bool, use_responses_api: bool) -> None: ] ) assert isinstance(response, AIMessage) - assert isinstance(response.text(), str) - assert response.text().upper() == response.text() + assert isinstance(response.text, str) + assert response.text.upper() == response.text @pytest.mark.scheduled diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index bb58e66d29b94..911e65cdd71ca 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -46,7 +46,7 @@ def _check_response(response: Optional[BaseMessage]) -> None: if "url" in annotation: assert "start_index" in annotation assert "end_index" in annotation - text_content = response.text() # type: ignore[operator,misc] + text_content = response.text # type: ignore[operator,misc] assert isinstance(text_content, str) assert text_content assert response.usage_metadata @@ -190,7 +190,7 @@ def test_parsed_pydantic_schema( model=MODEL_NAME, use_responses_api=True, output_version=output_version ) response = llm.invoke("how are ya", response_format=Foo) - parsed = Foo(**json.loads(response.text())) + parsed = Foo(**json.loads(response.text)) assert parsed == response.additional_kwargs["parsed"] assert parsed.response @@ -200,7 +200,7 @@ def test_parsed_pydantic_schema( assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) - parsed = Foo(**json.loads(full.text())) + parsed = Foo(**json.loads(full.text)) assert parsed == full.additional_kwargs["parsed"] assert parsed.response @@ -208,7 +208,7 @@ def test_parsed_pydantic_schema( async def test_parsed_pydantic_schema_async() -> None: llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) response = await llm.ainvoke("how are ya", response_format=Foo) - parsed = Foo(**json.loads(response.text())) + parsed = Foo(**json.loads(response.text)) assert parsed == response.additional_kwargs["parsed"] assert parsed.response @@ -218,7 +218,7 @@ async def test_parsed_pydantic_schema_async() -> None: assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) - parsed = Foo(**json.loads(full.text())) + parsed = Foo(**json.loads(full.text)) assert parsed == full.additional_kwargs["parsed"] assert parsed.response @@ -228,7 +228,7 @@ async def test_parsed_pydantic_schema_async() -> None: def test_parsed_dict_schema(schema: Any) -> None: llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) response = llm.invoke("how are ya", response_format=schema) - parsed = json.loads(response.text()) + parsed = json.loads(response.text) assert parsed == response.additional_kwargs["parsed"] assert parsed["response"] and isinstance(parsed["response"], str) @@ -238,7 +238,7 @@ def test_parsed_dict_schema(schema: Any) -> None: assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) - parsed = json.loads(full.text()) + parsed = json.loads(full.text) assert parsed == full.additional_kwargs["parsed"] assert parsed["response"] and isinstance(parsed["response"], str) @@ -252,7 +252,7 @@ class InvalidJoke(TypedDict): # Test not strict response = llm.invoke("Tell me a joke", response_format=InvalidJoke) - parsed = json.loads(response.text()) + parsed = json.loads(response.text) assert parsed == response.additional_kwargs["parsed"] # Test strict @@ -273,7 +273,7 @@ class InvalidJoke(TypedDict): async def test_parsed_dict_schema_async(schema: Any) -> None: llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) response = await llm.ainvoke("how are ya", response_format=schema) - parsed = json.loads(response.text()) + parsed = json.loads(response.text) assert parsed == response.additional_kwargs["parsed"] assert parsed["response"] and isinstance(parsed["response"], str) @@ -283,7 +283,7 @@ async def test_parsed_dict_schema_async(schema: Any) -> None: assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) - parsed = json.loads(full.text()) + parsed = json.loads(full.text) assert parsed == full.additional_kwargs["parsed"] assert parsed["response"] and isinstance(parsed["response"], str) @@ -297,7 +297,7 @@ def multiply(x: int, y: int) -> int: bound_llm = llm.bind_tools([multiply], response_format=Foo, strict=True) # Test structured output response = llm.invoke("how are ya", response_format=Foo) - parsed = Foo(**json.loads(response.text())) + parsed = Foo(**json.loads(response.text)) assert parsed == response.additional_kwargs["parsed"] assert parsed.response diff --git a/libs/partners/xai/langchain_xai/chat_models.py b/libs/partners/xai/langchain_xai/chat_models.py index 9137294543b6a..4787ef831af2d 100644 --- a/libs/partners/xai/langchain_xai/chat_models.py +++ b/libs/partners/xai/langchain_xai/chat_models.py @@ -105,7 +105,7 @@ class ChatXAI(BaseChatOpenAI): # type: ignore[override] .. code-block:: python for chunk in llm.stream(messages): - print(chunk.text(), end="") + print(chunk.text, end="") .. code-block:: python diff --git a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py index ff213e47b7b40..368eb410f81a3 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py +++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py @@ -703,7 +703,7 @@ def test_invoke(self, model: BaseChatModel) -> None: result = model.invoke("Hello") assert result is not None assert isinstance(result, AIMessage) - assert isinstance(result.text(), str) + assert isinstance(result.text, str) assert len(result.content) > 0 async def test_ainvoke(self, model: BaseChatModel) -> None: @@ -736,7 +736,7 @@ async def test_ainvoke(self, model: BaseChatModel) -> None: result = await model.ainvoke("Hello") assert result is not None assert isinstance(result, AIMessage) - assert isinstance(result.text(), str) + assert isinstance(result.text, str) assert len(result.content) > 0 def test_stream(self, model: BaseChatModel) -> None: @@ -835,7 +835,7 @@ def test_batch(self, model: BaseChatModel) -> None: for result in batch_results: assert result is not None assert isinstance(result, AIMessage) - assert isinstance(result.text(), str) + assert isinstance(result.text, str) assert len(result.content) > 0 async def test_abatch(self, model: BaseChatModel) -> None: @@ -865,7 +865,7 @@ async def test_abatch(self, model: BaseChatModel) -> None: for result in batch_results: assert result is not None assert isinstance(result, AIMessage) - assert isinstance(result.text(), str) + assert isinstance(result.text, str) assert len(result.content) > 0 def test_conversation(self, model: BaseChatModel) -> None: @@ -896,7 +896,7 @@ def test_conversation(self, model: BaseChatModel) -> None: result = model.invoke(messages) assert result is not None assert isinstance(result, AIMessage) - assert isinstance(result.text(), str) + assert isinstance(result.text, str) assert len(result.content) > 0 def test_double_messages_conversation(self, model: BaseChatModel) -> None: @@ -934,7 +934,7 @@ def test_double_messages_conversation(self, model: BaseChatModel) -> None: result = model.invoke(messages) assert result is not None assert isinstance(result, AIMessage) - assert isinstance(result.text(), str) + assert isinstance(result.text, str) assert len(result.content) > 0 def test_usage_metadata(self, model: BaseChatModel) -> None: @@ -2849,7 +2849,7 @@ def test_message_with_name(self, model: BaseChatModel) -> None: result = model.invoke([HumanMessage("hello", name="example_user")]) assert result is not None assert isinstance(result, AIMessage) - assert isinstance(result.text(), str) + assert isinstance(result.text, str) assert len(result.content) > 0 def test_agent_loop(self, model: BaseChatModel) -> None: From 2d450d4d3bf46cbfccbce8a3338f20e75837891b Mon Sep 17 00:00:00 2001 From: ccurme Date: Tue, 26 Aug 2025 19:03:47 -0300 Subject: [PATCH 65/73] fix(core): (v1) finish test (#32701) --- .../language_models/chat_models/test_base.py | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index 1176d683f1130..f3dc6d7a52ded 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -695,7 +695,7 @@ def test_extend_support_to_openai_multimodal_formats() -> None: "type": "input_audio", "input_audio": { "format": "wav", - "data": "data:audio/wav;base64,", + "data": "", }, }, { # file-base64 @@ -715,23 +715,22 @@ def test_extend_support_to_openai_multimodal_formats() -> None: expected_content_messages = HumanMessage( content=[ {"type": "text", "text": "Hello"}, # TextContentBlock - { # Chat Completions Image becomes ImageContentBlock after invoke - "type": "image", - "url": "https://example.com/image.png", + { # image-url passes through + "type": "image_url", + "image_url": {"url": "https://example.com/image.png"}, }, - { # ... - "type": "image", - "base64": "data:image/jpeg;base64,/9j/4AAQSkZJRg...", - "mime_type": "image/jpeg", + { # image-url passes through with inline data + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg..."}, }, { # AudioContentBlock "type": "audio", - "base64": "data:audio/wav;base64,", + "base64": "", "mime_type": "audio/wav", }, { # FileContentBlock "type": "file", - "base64": "data:application/pdf;base64,", + "base64": "", "mime_type": "application/pdf", "extras": {"filename": "draconomicon.pdf"}, }, @@ -742,6 +741,17 @@ def test_extend_support_to_openai_multimodal_formats() -> None: ] ) + normalized_content = _normalize_messages([messages]) + + # Check structure, ignoring auto-generated IDs + assert len(normalized_content) == 1 + normalized_message = normalized_content[0] + assert len(normalized_message.content) == len(expected_content_messages.content) + + assert _content_blocks_equal_ignore_id( + normalized_message.content, expected_content_messages.content + ) + def test_normalize_messages_edge_cases() -> None: # Test behavior of malformed/unrecognized content blocks From 8a14148336c946d435f16568c6623ffc8e83a608 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 26 Aug 2025 19:55:32 -0400 Subject: [PATCH 66/73] . --- .../language_models/fake_chat_models.py | 41 ++++++++++++++++++- libs/core/langchain_core/utils/_merge.py | 1 - .../unit_tests/fake/test_fake_chat_model.py | 8 +++- .../unit_tests/runnables/test_runnable.py | 12 +----- .../runnables/test_runnable_events_v1.py | 4 +- .../runnables/test_runnable_events_v2.py | 4 +- 6 files changed, 49 insertions(+), 21 deletions(-) diff --git a/libs/core/langchain_core/language_models/fake_chat_models.py b/libs/core/langchain_core/language_models/fake_chat_models.py index daca70effe76a..bffcf1f6cd955 100644 --- a/libs/core/langchain_core/language_models/fake_chat_models.py +++ b/libs/core/langchain_core/language_models/fake_chat_models.py @@ -254,6 +254,32 @@ def _generate( """Top Level call.""" message = next(self.messages) message_ = AIMessage(content=message) if isinstance(message, str) else message + + # Apply v1 content transformation if needed + if output_version == "v1": + from langchain_core.language_models._utils import ( + _update_message_content_to_blocks, + ) + + message_ = _update_message_content_to_blocks(message_, "v1") + + # Only set response metadata if output_version is explicitly provided + # If output_version is "v0" and self.output_version is None, it's the default + output_version_explicit = not ( + output_version == "v0" and getattr(self, "output_version", None) is None + ) + if output_version_explicit: + if hasattr(message_, "response_metadata"): + message_.response_metadata = {"output_version": output_version} + else: + # Create new message with response_metadata + message_ = AIMessage( + content=message_.content, + additional_kwargs=message_.additional_kwargs, + response_metadata={"output_version": output_version}, + id=message_.id, + ) + generation = ChatGeneration(message=message_) return ChatResult(generations=[generation]) @@ -267,11 +293,12 @@ def _stream( **kwargs: Any, ) -> Iterator[ChatGenerationChunk]: """Stream the output of the model.""" + # Always call _generate with v0 to get the original string content for splitting chat_result = self._generate( messages, stop=stop, run_manager=run_manager, - output_version=output_version, + output_version="v0", **kwargs, ) if not isinstance(chat_result, ChatResult): @@ -321,7 +348,17 @@ def _stream( chunk.message, "v1" ) - chunk.message.response_metadata = {"output_version": output_version} + # Only set response metadata if output_version is explicitly provided + # If output_version is "v0" and self.output_version is None, it's the + # default + output_version_explicit = not ( + output_version == "v0" + and getattr(self, "output_version", None) is None + ) + if output_version_explicit: + chunk.message.response_metadata = {"output_version": output_version} + else: + chunk.message.response_metadata = {} if run_manager: run_manager.on_llm_new_token(token, chunk=chunk) diff --git a/libs/core/langchain_core/utils/_merge.py b/libs/core/langchain_core/utils/_merge.py index db6b5665ade39..93b99d24b0992 100644 --- a/libs/core/langchain_core/utils/_merge.py +++ b/libs/core/langchain_core/utils/_merge.py @@ -44,7 +44,6 @@ def merge_dicts(left: dict[str, Any], *others: dict[str, Any]) -> dict[str, Any] ) raise TypeError(msg) elif isinstance(merged[right_k], str): - # Special handling for output_version - it should be consistent if right_k == "output_version": if merged[right_k] == right_v: continue diff --git a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py index 182305e04cc10..4c45237698023 100644 --- a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py +++ b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py @@ -92,7 +92,9 @@ async def test_generic_fake_chat_model_stream() -> None: content="", additional_kwargs={"bar": 24}, response_metadata={} ), _any_id_ai_message_chunk( - content="", chunk_position="last", response_metadata={} + content="", + chunk_position="last", + response_metadata={}, ), ] assert len({chunk.id for chunk in chunks}) == 1 @@ -140,7 +142,9 @@ async def test_generic_fake_chat_model_stream() -> None: response_metadata={}, ), _any_id_ai_message_chunk( - content="", chunk_position="last", response_metadata={} + content="", + chunk_position="last", + response_metadata={}, ), ] assert len({chunk.id for chunk in chunks}) == 1 diff --git a/libs/core/tests/unit_tests/runnables/test_runnable.py b/libs/core/tests/unit_tests/runnables/test_runnable.py index f55efc2bb6fda..65b6ea5d4f9bd 100644 --- a/libs/core/tests/unit_tests/runnables/test_runnable.py +++ b/libs/core/tests/unit_tests/runnables/test_runnable.py @@ -1862,11 +1862,7 @@ def test_prompt_with_chat_model( ] == [ _any_id_ai_message_chunk(content="f"), _any_id_ai_message_chunk(content="o"), - _any_id_ai_message_chunk( - content="o", - chunk_position="last", - additional_kwargs={}, - ), + _any_id_ai_message_chunk(content="o", chunk_position="last"), ] assert prompt_spy.call_args.args[1] == {"question": "What is your name?"} assert chat_spy.call_args.args[1] == ChatPromptValue( @@ -1975,11 +1971,7 @@ async def test_prompt_with_chat_model_async( ] == [ _any_id_ai_message_chunk(content="f"), _any_id_ai_message_chunk(content="o"), - _any_id_ai_message_chunk( - content="o", - chunk_position="last", - additional_kwargs={}, - ), + _any_id_ai_message_chunk(content="o", chunk_position="last"), ] assert prompt_spy.call_args.args[1] == {"question": "What is your name?"} assert chat_spy.call_args.args[1] == ChatPromptValue( diff --git a/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py b/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py index e19819685bcdf..2dc16821f2b53 100644 --- a/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py +++ b/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py @@ -1887,9 +1887,7 @@ async def test_events_astream_config() -> None: ) model_02 = model.with_config({"configurable": {"messages": good_world_on_repeat}}) - assert model_02.invoke("hello") == AIMessage( - content="Goodbye world", additional_kwargs={}, id="ai2" - ) + assert model_02.invoke("hello") == AIMessage(content="Goodbye world", id="ai2") events = await _collect_events(model_02.astream_events("hello", version="v1")) _assert_events_equal_allow_superset_metadata( diff --git a/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py b/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py index de65d9ff905e2..536da3665ed9e 100644 --- a/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py +++ b/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py @@ -1839,9 +1839,7 @@ async def test_events_astream_config() -> None: ) model_02 = model.with_config({"configurable": {"messages": good_world_on_repeat}}) - assert model_02.invoke("hello") == AIMessage( - content="Goodbye world", additional_kwargs={}, id="ai2" - ) + assert model_02.invoke("hello") == AIMessage(content="Goodbye world", id="ai2") events = await _collect_events(model_02.astream_events("hello", version="v2")) _assert_events_equal_allow_superset_metadata( From 32941d6ec5fadb9444f268295f05f1c60f975417 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 26 Aug 2025 20:25:09 -0400 Subject: [PATCH 67/73] . --- .../language_models/chat_models.py | 71 ++++++------ .../language_models/fake_chat_models.py | 23 ++-- .../unit_tests/fake/test_fake_chat_model.py | 102 ++++++------------ .../langchain_openai/chat_models/base.py | 7 +- 4 files changed, 79 insertions(+), 124 deletions(-) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index ec06c0b42d0b3..83472227278e6 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -49,6 +49,10 @@ is_data_content_block, message_chunk_to_message, ) +from langchain_core.output_parsers.openai_tools import ( + JsonOutputKeyToolsParser, + PydanticToolsParser, +) from langchain_core.outputs import ( ChatGeneration, ChatGenerationChunk, @@ -430,11 +434,11 @@ def invoke( """Invoke the model. Args: - input: The model input. - config: The config to use for this model run. - stop: Stop words to use during generation. + input: The model input. See ``LanguageModelInput`` for valid options. + config: The ``RunnableConfig`` to use for this model run. + stop: Stop word(s) to use during generation. output_version: Override the model's ``output_version`` for this invocation. - If None, uses the model's configured ``output_version``. + If None, uses the called model's configured ``output_version``. **kwargs: Additional keyword arguments. Returns: @@ -447,6 +451,8 @@ def invoke( output_version if output_version is not None else self.output_version ) kwargs["_output_version"] = effective_output_version or "v0" + + # Whether the user explicitly set an output_version for either model or call kwargs["_output_version_explicit"] = ( output_version is not None or self.output_version is not None ) @@ -481,11 +487,11 @@ async def ainvoke( """Asynchronously invoke the model. Args: - input: The model input. - config: The config to use for this model run. - stop: Stop words to use during generation. + input: The model input. See ``LanguageModelInput`` for valid options. + config: The ``RunnableConfig`` to use for this model run. + stop: Stop word(s) to use during generation. output_version: Override the model's ``output_version`` for this invocation. - If None, uses the model's configured ``output_version``. + If None, uses the called model's configured ``output_version``. **kwargs: Additional keyword arguments. Returns: @@ -498,6 +504,8 @@ async def ainvoke( output_version if output_version is not None else self.output_version ) kwargs["_output_version"] = effective_output_version or "v0" + + # Whether the user explicitly set an output_version for either model or call kwargs["_output_version_explicit"] = ( output_version is not None or self.output_version is not None ) @@ -564,11 +572,11 @@ def stream( """Stream responses from the chat model. Args: - input: The model input. - config: The config to use for this model run. - stop: Stop words to use during generation. + input: The model input. See ``LanguageModelInput`` for valid options. + config: The ``RunnableConfig`` to use for this model run. + stop: Stop word(s) to use during generation. output_version: Override the model's ``output_version`` for this invocation. - If None, uses the model's configured ``output_version``. + If None, uses the called model's configured ``output_version``. **kwargs: Additional keyword arguments. Returns: @@ -579,6 +587,8 @@ def stream( output_version if output_version is not None else self.output_version ) kwargs["_output_version"] = effective_output_version or "v0" + + # Whether the user explicitly set an output_version for either model or call kwargs["_output_version_explicit"] = ( output_version is not None or self.output_version is not None ) @@ -639,7 +649,7 @@ def stream( input_messages = _normalize_messages(messages) run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id))) yielded = False - # Filter out internal parameters before passing to implementation + filtered_kwargs = { k: v for k, v in kwargs.items() @@ -655,7 +665,7 @@ def stream( chunk.message.id = run_id response_metadata = _gen_info_and_msg_metadata(chunk) output_version = kwargs["_output_version"] - # Add output_version to response_metadata if it was explicitly set + # Add output_version to response_metadata only if was explicitly set if kwargs.get("_output_version_explicit", False): response_metadata["output_version"] = output_version chunk.message.response_metadata = response_metadata @@ -725,11 +735,11 @@ async def astream( """Asynchronously stream responses from the model. Args: - input: The model input. - config: The config to use for this model run. - stop: Stop words to use during generation. + input: The model input. See ``LanguageModelInput`` for valid options. + config: The ``RunnableConfig`` to use for this model run. + stop: Stop word(s) to use during generation. output_version: Override the model's ``output_version`` for this invocation. - If None, uses the model's configured ``output_version``. + If None, uses the called model's configured ``output_version``. **kwargs: Additional keyword arguments. Returns: @@ -740,6 +750,8 @@ async def astream( output_version if output_version is not None else self.output_version ) kwargs["_output_version"] = effective_output_version or "v0" + + # Whether the user explicitly set an output_version for either model or call kwargs["_output_version_explicit"] = ( output_version is not None or self.output_version is not None ) @@ -802,7 +814,7 @@ async def astream( input_messages = _normalize_messages(messages) run_id = "-".join((LC_ID_PREFIX, str(run_manager.run_id))) yielded = False - # Filter out internal parameters before passing to implementation + filtered_kwargs = { k: v for k, v in kwargs.items() @@ -818,7 +830,7 @@ async def astream( chunk.message.id = run_id response_metadata = _gen_info_and_msg_metadata(chunk) output_version = kwargs["_output_version"] - # Add output_version to response_metadata if it was explicitly set + # Add output_version to response_metadata only if was explicitly set if kwargs.get("_output_version_explicit", False): response_metadata["output_version"] = output_version chunk.message.response_metadata = response_metadata @@ -1308,7 +1320,7 @@ def _generate_with_cache( f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None ) yielded = False - # Filter out internal parameters before passing to implementation + filtered_kwargs = { k: v for k, v in kwargs.items() @@ -1318,7 +1330,7 @@ def _generate_with_cache( messages, stop=stop, output_version=output_version, **filtered_kwargs ): response_metadata = _gen_info_and_msg_metadata(chunk) - # Add output_version to response_metadata if it was explicitly set + # Add output_version to response_metadata only if it was explicitly set if output_version_explicit: response_metadata["output_version"] = output_version chunk.message.response_metadata = response_metadata @@ -1355,7 +1367,6 @@ def _generate_with_cache( chunks.append(chunk) result = generate_from_stream(iter(chunks)) else: - # Filter out internal parameters before passing to implementation filtered_kwargs = { k: v for k, v in kwargs.items() @@ -1389,7 +1400,7 @@ def _generate_with_cache( if run_manager and generation.message.id is None: generation.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}-{idx}" response_metadata = _gen_info_and_msg_metadata(generation) - # Add output_version to response_metadata if it was explicitly set + # Add output_version to response_metadata only if it was explicitly set if output_version_explicit: response_metadata["output_version"] = output_version generation.message.response_metadata = response_metadata @@ -1449,7 +1460,7 @@ async def _agenerate_with_cache( f"{LC_ID_PREFIX}-{run_manager.run_id}" if run_manager else None ) yielded = False - # Filter out internal parameters before passing to implementation + filtered_kwargs = { k: v for k, v in kwargs.items() @@ -1459,7 +1470,7 @@ async def _agenerate_with_cache( messages, stop=stop, output_version=output_version, **filtered_kwargs ): response_metadata = _gen_info_and_msg_metadata(chunk) - # Add output_version to response_metadata if it was explicitly set + # Add output_version to response_metadata only if it was explicitly set if output_version_explicit: response_metadata["output_version"] = output_version chunk.message.response_metadata = response_metadata @@ -1496,7 +1507,6 @@ async def _agenerate_with_cache( chunks.append(chunk) result = generate_from_stream(iter(chunks)) elif inspect.signature(self._agenerate).parameters.get("run_manager"): - # Filter out internal parameters before passing to implementation filtered_kwargs = { k: v for k, v in kwargs.items() @@ -1532,7 +1542,7 @@ async def _agenerate_with_cache( if run_manager and generation.message.id is None: generation.message.id = f"{LC_ID_PREFIX}-{run_manager.run_id}-{idx}" response_metadata = _gen_info_and_msg_metadata(generation) - # Add output_version to response_metadata if it was explicitly set + # Add output_version to response_metadata only if it was explicitly set if output_version_explicit: response_metadata["output_version"] = output_version generation.message.response_metadata = response_metadata @@ -1899,11 +1909,6 @@ class AnswerWithJustification(BaseModel): msg = f"Received unsupported arguments {kwargs}" raise ValueError(msg) - from langchain_core.output_parsers.openai_tools import ( - JsonOutputKeyToolsParser, - PydanticToolsParser, - ) - if type(self).bind_tools is BaseChatModel.bind_tools: msg = "with_structured_output is not implemented for this model." raise NotImplementedError(msg) diff --git a/libs/core/langchain_core/language_models/fake_chat_models.py b/libs/core/langchain_core/language_models/fake_chat_models.py index bffcf1f6cd955..7943f0c78ad69 100644 --- a/libs/core/langchain_core/language_models/fake_chat_models.py +++ b/libs/core/langchain_core/language_models/fake_chat_models.py @@ -12,6 +12,9 @@ AsyncCallbackManagerForLLMRun, CallbackManagerForLLMRun, ) +from langchain_core.language_models._utils import ( + _update_message_content_to_blocks, +) from langchain_core.language_models.chat_models import BaseChatModel, SimpleChatModel from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult @@ -255,16 +258,11 @@ def _generate( message = next(self.messages) message_ = AIMessage(content=message) if isinstance(message, str) else message - # Apply v1 content transformation if needed if output_version == "v1": - from langchain_core.language_models._utils import ( - _update_message_content_to_blocks, - ) - message_ = _update_message_content_to_blocks(message_, "v1") - # Only set response metadata if output_version is explicitly provided - # If output_version is "v0" and self.output_version is None, it's the default + # Only set in response metadata if output_version is explicitly provided + # (If output_version is "v0" and self.output_version is None, it's the default) output_version_explicit = not ( output_version == "v0" and getattr(self, "output_version", None) is None ) @@ -272,7 +270,6 @@ def _generate( if hasattr(message_, "response_metadata"): message_.response_metadata = {"output_version": output_version} else: - # Create new message with response_metadata message_ = AIMessage( content=message_.content, additional_kwargs=message_.additional_kwargs, @@ -293,12 +290,11 @@ def _stream( **kwargs: Any, ) -> Iterator[ChatGenerationChunk]: """Stream the output of the model.""" - # Always call _generate with v0 to get the original string content for splitting chat_result = self._generate( messages, stop=stop, run_manager=run_manager, - output_version="v0", + output_version="v0", # Always call with v0 to get original string content **kwargs, ) if not isinstance(chat_result, ChatResult): @@ -340,17 +336,10 @@ def _stream( chunk.message.chunk_position = "last" if output_version == "v1": - from langchain_core.language_models._utils import ( - _update_message_content_to_blocks, - ) - chunk.message = _update_message_content_to_blocks( chunk.message, "v1" ) - # Only set response metadata if output_version is explicitly provided - # If output_version is "v0" and self.output_version is None, it's the - # default output_version_explicit = not ( output_version == "v0" and getattr(self, "output_version", None) is None diff --git a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py index 4c45237698023..0e1944534d07d 100644 --- a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py +++ b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py @@ -28,11 +28,11 @@ def test_generic_fake_chat_model_invoke() -> None: infinite_cycle = cycle([AIMessage(content="hello"), AIMessage(content="goodbye")]) model = GenericFakeChatModel(messages=infinite_cycle) response = model.invoke("meow") - assert response == _any_id_ai_message(content="hello", response_metadata={}) + assert response == _any_id_ai_message(content="hello") response = model.invoke("kitty") - assert response == _any_id_ai_message(content="goodbye", response_metadata={}) + assert response == _any_id_ai_message(content="goodbye") response = model.invoke("meow") - assert response == _any_id_ai_message(content="hello", response_metadata={}) + assert response == _any_id_ai_message(content="hello") async def test_generic_fake_chat_model_ainvoke() -> None: @@ -40,11 +40,11 @@ async def test_generic_fake_chat_model_ainvoke() -> None: infinite_cycle = cycle([AIMessage(content="hello"), AIMessage(content="goodbye")]) model = GenericFakeChatModel(messages=infinite_cycle) response = await model.ainvoke("meow") - assert response == _any_id_ai_message(content="hello", response_metadata={}) + assert response == _any_id_ai_message(content="hello") response = await model.ainvoke("kitty") - assert response == _any_id_ai_message(content="goodbye", response_metadata={}) + assert response == _any_id_ai_message(content="goodbye") response = await model.ainvoke("meow") - assert response == _any_id_ai_message(content="hello", response_metadata={}) + assert response == _any_id_ai_message(content="hello") async def test_generic_fake_chat_model_stream() -> None: @@ -57,25 +57,17 @@ async def test_generic_fake_chat_model_stream() -> None: model = GenericFakeChatModel(messages=infinite_cycle) chunks = [chunk async for chunk in model.astream("meow")] assert chunks == [ - _any_id_ai_message_chunk(content="hello", response_metadata={}), - _any_id_ai_message_chunk(content=" ", response_metadata={}), - _any_id_ai_message_chunk( - content="goodbye", - chunk_position="last", - response_metadata={}, - ), + _any_id_ai_message_chunk(content="hello"), + _any_id_ai_message_chunk(content=" "), + _any_id_ai_message_chunk(content="goodbye", chunk_position="last"), ] assert len({chunk.id for chunk in chunks}) == 1 chunks = list(model.stream("meow")) assert chunks == [ - _any_id_ai_message_chunk(content="hello", response_metadata={}), - _any_id_ai_message_chunk(content=" ", response_metadata={}), - _any_id_ai_message_chunk( - content="goodbye", - chunk_position="last", - response_metadata={}, - ), + _any_id_ai_message_chunk(content="hello"), + _any_id_ai_message_chunk(content=" "), + _any_id_ai_message_chunk(content="goodbye", chunk_position="last"), ] assert len({chunk.id for chunk in chunks}) == 1 @@ -85,17 +77,9 @@ async def test_generic_fake_chat_model_stream() -> None: model = GenericFakeChatModel(messages=cycle([message])) chunks = [chunk async for chunk in model.astream("meow")] assert chunks == [ - _any_id_ai_message_chunk( - content="", additional_kwargs={"foo": 42}, response_metadata={} - ), - _any_id_ai_message_chunk( - content="", additional_kwargs={"bar": 24}, response_metadata={} - ), - _any_id_ai_message_chunk( - content="", - chunk_position="last", - response_metadata={}, - ), + _any_id_ai_message_chunk(content="", additional_kwargs={"foo": 42}), + _any_id_ai_message_chunk(content="", additional_kwargs={"bar": 24}), + _any_id_ai_message_chunk(content="", chunk_position="last"), ] assert len({chunk.id for chunk in chunks}) == 1 @@ -115,37 +99,24 @@ async def test_generic_fake_chat_model_stream() -> None: assert chunks == [ _any_id_ai_message_chunk( content="", - additional_kwargs={ - "function_call": {"name": "move_file"}, - }, - response_metadata={}, + additional_kwargs={"function_call": {"name": "move_file"}}, ), _any_id_ai_message_chunk( content="", additional_kwargs={ "function_call": {"arguments": '{\n "source_path": "foo"'}, }, - response_metadata={}, ), _any_id_ai_message_chunk( - content="", - additional_kwargs={ - "function_call": {"arguments": ","}, - }, - response_metadata={}, + content="", additional_kwargs={"function_call": {"arguments": ","}} ), _any_id_ai_message_chunk( content="", additional_kwargs={ "function_call": {"arguments": '\n "destination_path": "bar"\n}'}, }, - response_metadata={}, - ), - _any_id_ai_message_chunk( - content="", - chunk_position="last", - response_metadata={}, ), + _any_id_ai_message_chunk(content="", chunk_position="last"), ] assert len({chunk.id for chunk in chunks}) == 1 @@ -163,9 +134,8 @@ async def test_generic_fake_chat_model_stream() -> None: "name": "move_file", "arguments": '{\n "source_path": "foo",\n "' 'destination_path": "bar"\n}', - }, + } }, - response_metadata={}, id=chunks[0].id, chunk_position="last", ) @@ -180,13 +150,9 @@ async def test_generic_fake_chat_model_astream_log() -> None: ] final = log_patches[-1] assert final.state["streamed_output"] == [ - _any_id_ai_message_chunk(content="hello", response_metadata={}), - _any_id_ai_message_chunk(content=" ", response_metadata={}), - _any_id_ai_message_chunk( - content="goodbye", - chunk_position="last", - response_metadata={}, - ), + _any_id_ai_message_chunk(content="hello"), + _any_id_ai_message_chunk(content=" "), + _any_id_ai_message_chunk(content="goodbye", chunk_position="last"), ] assert len({chunk.id for chunk in final.state["streamed_output"]}) == 1 @@ -241,13 +207,9 @@ async def on_llm_new_token( ) ] assert results == [ - _any_id_ai_message_chunk(content="hello", response_metadata={}), - _any_id_ai_message_chunk(content=" ", response_metadata={}), - _any_id_ai_message_chunk( - content="goodbye", - chunk_position="last", - response_metadata={}, - ), + _any_id_ai_message_chunk(content="hello"), + _any_id_ai_message_chunk(content=" "), + _any_id_ai_message_chunk(content="goodbye", chunk_position="last"), ] assert tokens == ["hello", " ", "goodbye"] assert len({chunk.id for chunk in results}) == 1 @@ -257,21 +219,19 @@ def test_chat_model_inputs() -> None: fake = ParrotFakeChatModel() assert cast("HumanMessage", fake.invoke("hello")) == _any_id_human_message( - content="hello", response_metadata={} - ) - assert fake.invoke([("ai", "blah")]) == _any_id_ai_message( - content="blah", response_metadata={} + content="hello" ) + assert fake.invoke([("ai", "blah")]) == _any_id_ai_message(content="blah") assert fake.invoke([AIMessage(content="blah")]) == _any_id_ai_message( - content="blah", response_metadata={} + content="blah" ) def test_fake_list_chat_model_batch() -> None: expected = [ - _any_id_ai_message(content="a", response_metadata={}), - _any_id_ai_message(content="b", response_metadata={}), - _any_id_ai_message(content="c", response_metadata={}), + _any_id_ai_message(content="a"), + _any_id_ai_message(content="b"), + _any_id_ai_message(content="c"), ] for _ in range(20): # run this 20 times to test race condition in batch diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index ce5447aaacd60..191b3ea947bda 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -1247,9 +1247,10 @@ def _get_request_payload( if stop is not None: kwargs["stop"] = stop - # Filter out parameters that shouldn't be passed to OpenAI API - filtered_kwargs = {k: v for k, v in kwargs.items() if k != "output_version"} - payload = {**self._default_params, **filtered_kwargs} + # # Filter out parameters that shouldn't be passed to OpenAI API + # filtered_kwargs = {k: v for k, v in kwargs.items() if k != "output_version"} + # payload = {**self._default_params, **filtered_kwargs} + payload = {**self._default_params, **kwargs} if self._use_responses_api(payload): if self.use_previous_response_id: From 313f5f243819d5756905f47a15e10a5ca54964e3 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 26 Aug 2025 20:25:47 -0400 Subject: [PATCH 68/73] . --- libs/partners/openai/langchain_openai/chat_models/base.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 191b3ea947bda..c45dada48ee5f 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -1247,9 +1247,6 @@ def _get_request_payload( if stop is not None: kwargs["stop"] = stop - # # Filter out parameters that shouldn't be passed to OpenAI API - # filtered_kwargs = {k: v for k, v in kwargs.items() if k != "output_version"} - # payload = {**self._default_params, **filtered_kwargs} payload = {**self._default_params, **kwargs} if self._use_responses_api(payload): From 419450df98bd031d257b706e3ce7191c617ce483 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 26 Aug 2025 21:53:42 -0400 Subject: [PATCH 69/73] . --- .../language_models/chat_models.py | 7 +-- .../chat_models/test_output_version.py | 53 ++++++++++++++----- 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 83472227278e6..2ad7ae04e1c58 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -1377,14 +1377,12 @@ def _generate_with_cache( messages, stop=stop, run_manager=run_manager, - output_version=output_version, **filtered_kwargs, ) else: result = self._generate( messages, stop=stop, - output_version=output_version, **filtered_kwargs, ) @@ -1516,7 +1514,6 @@ async def _agenerate_with_cache( messages, stop=stop, run_manager=run_manager, - output_version=output_version, **filtered_kwargs, ) else: @@ -1526,9 +1523,7 @@ async def _agenerate_with_cache( for k, v in kwargs.items() if k not in ("_output_version", "_output_version_explicit") } - result = await self._agenerate( - messages, stop=stop, output_version=output_version, **filtered_kwargs - ) + result = await self._agenerate(messages, stop=stop, **filtered_kwargs) if output_version == "v1": # Overwrite .content with .content_blocks diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_output_version.py b/libs/core/tests/unit_tests/language_models/chat_models/test_output_version.py index fcea6e3ba561a..6d234600e7202 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_output_version.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_output_version.py @@ -79,10 +79,10 @@ class TestOutputVersionPassing: @pytest.mark.parametrize( ("method_name", "default_version", "provided_version", "expected_version"), [ - # Test invoke - ("invoke", "v1", None, "v1"), # Uses default when not provided - ("invoke", "v0", "v1", "v1"), # Uses provided version - # Test stream + # Test invoke - output_version is no longer passed to _generate methods + ("invoke", "v1", None, "v0"), # Always defaults to v0 in _generate + ("invoke", "v0", "v1", "v0"), # Always defaults to v0 in _generate + # Test stream - output_version is still passed to _stream methods ("stream", "v1", None, "v1"), # Uses default when not provided ("stream", "v1", "v2", "v2"), # Uses provided version ], @@ -116,10 +116,10 @@ def test_sync_methods_output_version( @pytest.mark.parametrize( ("method_name", "default_version", "provided_version", "expected_version"), [ - # Test ainvoke - ("ainvoke", "v1", None, "v1"), # Uses default when not provided - ("ainvoke", "v0", "v1", "v1"), # Uses provided version - # Test astream + # Test ainvoke - output_version is no longer passed to _generate methods + ("ainvoke", "v1", None, "v0"), # Always defaults to v0 in _generate + ("ainvoke", "v0", "v1", "v0"), # Always defaults to v0 in _generate + # Test astream - output_version is still passed to _stream methods ("astream", "v1", None, "v1"), # Uses default when not provided ("astream", "v1", "v0", "v0"), # Uses provided version ], @@ -186,9 +186,10 @@ def _llm_type(self) -> str: return "no-stream-model" model = NoStreamModel(output_version="v1") - # Stream should fallback to invoke and pass the output_version + # Stream should fallback to invoke but output_version is no longer + # passed to _generate list(model.stream(messages, output_version="v2")) - assert model.last_output_version == "v2" + assert model.last_output_version == "v0" # _generate always gets v0 default async def test_astream_fallback_to_ainvoke_passes_output_version( self, @@ -220,10 +221,11 @@ def _llm_type(self) -> str: return "no-stream-model" model = NoStreamModel(output_version="v1") - # astream should fallback to ainvoke and pass the output_version + # astream should fallback to ainvoke but output_version is no longer + # passed to _generate async for _ in model.astream(messages, output_version="v2"): pass - assert model.last_output_version == "v2" + assert model.last_output_version == "v0" # _generate always gets v0 default class TestOutputVersionInMessages: @@ -276,6 +278,28 @@ async def test_output_version_added_to_astream_message_response_metadata( assert "output_version" in chunk.response_metadata assert chunk.response_metadata["output_version"] == "v2" + def test_no_output_version_anywhere_no_metadata( + self, + messages: list[BaseMessage], + ) -> None: + """Test that when no output_version is set, no metadata is added.""" + from itertools import cycle + + # Test invoke + model = GenericFakeChatModel(messages=cycle([AIMessage(content="hello")])) + result = model.invoke(messages) + assert result.response_metadata == {} + assert isinstance(result.content, str) # Should be v0 behavior + + # Test stream + model_stream = GenericFakeChatModel( + messages=cycle([AIMessage(content="hello")]) + ) + chunks = list(model_stream.stream(messages)) + content_chunks = [chunk for chunk in chunks if chunk.content] + for chunk in content_chunks: + assert chunk.response_metadata == {} + class TestOutputVersionMerging: """Test output_version handling in merge operations.""" @@ -337,5 +361,6 @@ def test_output_version_preserved_through_chain_calls( call_kwargs = mock_cache.call_args[1] assert call_kwargs.get("_output_version") == "v2" - # Verify the model implementation received the correct output_version - assert model.last_output_version == "v2" + # Verify the model implementation received the default output_version + # (not the explicit one) + assert model.last_output_version == "v0" From 624afb174ea7cb66992ef1ffa5b95638dc427086 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 26 Aug 2025 21:56:49 -0400 Subject: [PATCH 70/73] ss --- .../__snapshots__/test_runnable.ambr | 453 ++++++++++++++++++ 1 file changed, 453 insertions(+) diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index 910c21479a870..fe1b5884ed626 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -1962,7 +1962,9 @@ 'additionalProperties': True, 'description': ''' Message from an AI. + AIMessage is returned from a chat model as a response to a prompt. + This message represents the output of the model and consists of both the raw output as returned by the model together standardized fields (e.g., tool calls, usage metadata) added by the LangChain framework. @@ -2343,8 +2345,10 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. + FunctionMessage are an older version of the ToolMessage schema, and do not contain the tool_call_id field. + The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -2474,10 +2478,15 @@ 'additionalProperties': True, 'description': ''' Message from a human. + HumanMessages are messages that are passed in from a human to the model. + Example: + .. code-block:: python + from langchain_core.messages import HumanMessage, SystemMessage + messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -2486,6 +2495,7 @@ content="What is your name?" ) ] + # Instantiate a chat model and invoke it with the messages model = ... print(model.invoke(messages)) @@ -2638,15 +2648,21 @@ 'InputTokenDetails': dict({ 'description': ''' Breakdown of input token counts. + Does *not* need to sum to full input token count. Does *not* need to have all keys. + Example: + .. code-block:: python + { "audio": 10, "cache_creation": 200, "cache_read": 100, } + .. versionadded:: 0.3.9 + May also hold extra provider-specific keys. ''', 'properties': dict({ @@ -2669,6 +2685,7 @@ 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', @@ -2750,13 +2767,18 @@ 'OutputTokenDetails': dict({ 'description': ''' Breakdown of output token counts. + Does *not* need to sum to full output token count. Does *not* need to have all keys. + Example: + .. code-block:: python + { "audio": 10, "reasoning": 200, } + .. versionadded:: 0.3.9 ''', 'properties': dict({ @@ -2776,11 +2798,16 @@ 'additionalProperties': True, 'description': ''' Message for priming AI behavior. + The system message is usually passed in as the first of a sequence of input messages. + Example: + .. code-block:: python + from langchain_core.messages import HumanMessage, SystemMessage + messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -2789,6 +2816,7 @@ content="What is your name?" ) ] + # Define a chat model and invoke it with the messages print(model.invoke(messages)) ''', @@ -2930,13 +2958,17 @@ 'ToolCall': dict({ 'description': ''' Represents a request to call a tool. + Example: + .. code-block:: python + { "name": "foo", "args": {"a": 1}, "id": "123" } + This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". ''', @@ -2976,13 +3008,18 @@ 'ToolCallChunk': dict({ 'description': ''' A chunk of a tool call (e.g., as part of a stream). + When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their values of `index` are equal and not None. + Example: + .. code-block:: python + left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] + ( AIMessageChunk(content="", tool_call_chunks=left_chunks) + AIMessageChunk(content="", tool_call_chunks=right_chunks) @@ -3051,27 +3088,40 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. + ToolMessages contain the result of a tool invocation. Typically, the result is encoded inside the `content` field. + Example: A ToolMessage representing a result of 42 from a tool call with id + .. code-block:: python + from langchain_core.messages import ToolMessage + ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') + + Example: A ToolMessage where only part of the tool output is sent to the model and the full output is passed in to artifact. + .. versionadded:: 0.2.17 + .. code-block:: python + from langchain_core.messages import ToolMessage + tool_output = { "stdout": "From the graph we can see that the correlation between x and y is ...", "stderr": None, "artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."}, } + ToolMessage( content=tool_output["stdout"], artifact=tool_output, tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) + The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -3238,9 +3288,13 @@ 'UsageMetadata': dict({ 'description': ''' Usage metadata for a message, such as token counts. + This is a standard representation of token usage that is consistent across models. + Example: + .. code-block:: python + { "input_tokens": 350, "output_tokens": 240, @@ -3255,7 +3309,9 @@ "reasoning": 200, } } + .. versionchanged:: 0.3.9 + Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ @@ -3347,7 +3403,9 @@ 'additionalProperties': True, 'description': ''' Message from an AI. + AIMessage is returned from a chat model as a response to a prompt. + This message represents the output of the model and consists of both the raw output as returned by the model together standardized fields (e.g., tool calls, usage metadata) added by the LangChain framework. @@ -3727,6 +3785,7 @@ 'ChatPromptValueConcrete': dict({ 'description': ''' Chat prompt value which explicitly lists out the message types it accepts. + For use in external schemas. ''', 'properties': dict({ @@ -3790,8 +3849,10 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. + FunctionMessage are an older version of the ToolMessage schema, and do not contain the tool_call_id field. + The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -3921,10 +3982,15 @@ 'additionalProperties': True, 'description': ''' Message from a human. + HumanMessages are messages that are passed in from a human to the model. + Example: + .. code-block:: python + from langchain_core.messages import HumanMessage, SystemMessage + messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -3933,6 +3999,7 @@ content="What is your name?" ) ] + # Instantiate a chat model and invoke it with the messages model = ... print(model.invoke(messages)) @@ -4085,15 +4152,21 @@ 'InputTokenDetails': dict({ 'description': ''' Breakdown of input token counts. + Does *not* need to sum to full input token count. Does *not* need to have all keys. + Example: + .. code-block:: python + { "audio": 10, "cache_creation": 200, "cache_read": 100, } + .. versionadded:: 0.3.9 + May also hold extra provider-specific keys. ''', 'properties': dict({ @@ -4116,6 +4189,7 @@ 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', @@ -4197,13 +4271,18 @@ 'OutputTokenDetails': dict({ 'description': ''' Breakdown of output token counts. + Does *not* need to sum to full output token count. Does *not* need to have all keys. + Example: + .. code-block:: python + { "audio": 10, "reasoning": 200, } + .. versionadded:: 0.3.9 ''', 'properties': dict({ @@ -4242,11 +4321,16 @@ 'additionalProperties': True, 'description': ''' Message for priming AI behavior. + The system message is usually passed in as the first of a sequence of input messages. + Example: + .. code-block:: python + from langchain_core.messages import HumanMessage, SystemMessage + messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -4255,6 +4339,7 @@ content="What is your name?" ) ] + # Define a chat model and invoke it with the messages print(model.invoke(messages)) ''', @@ -4396,13 +4481,17 @@ 'ToolCall': dict({ 'description': ''' Represents a request to call a tool. + Example: + .. code-block:: python + { "name": "foo", "args": {"a": 1}, "id": "123" } + This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". ''', @@ -4442,13 +4531,18 @@ 'ToolCallChunk': dict({ 'description': ''' A chunk of a tool call (e.g., as part of a stream). + When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their values of `index` are equal and not None. + Example: + .. code-block:: python + left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] + ( AIMessageChunk(content="", tool_call_chunks=left_chunks) + AIMessageChunk(content="", tool_call_chunks=right_chunks) @@ -4517,27 +4611,40 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. + ToolMessages contain the result of a tool invocation. Typically, the result is encoded inside the `content` field. + Example: A ToolMessage representing a result of 42 from a tool call with id + .. code-block:: python + from langchain_core.messages import ToolMessage + ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') + + Example: A ToolMessage where only part of the tool output is sent to the model and the full output is passed in to artifact. + .. versionadded:: 0.2.17 + .. code-block:: python + from langchain_core.messages import ToolMessage + tool_output = { "stdout": "From the graph we can see that the correlation between x and y is ...", "stderr": None, "artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."}, } + ToolMessage( content=tool_output["stdout"], artifact=tool_output, tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) + The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -4704,9 +4811,13 @@ 'UsageMetadata': dict({ 'description': ''' Usage metadata for a message, such as token counts. + This is a standard representation of token usage that is consistent across models. + Example: + .. code-block:: python + { "input_tokens": 350, "output_tokens": 240, @@ -4721,7 +4832,9 @@ "reasoning": 200, } } + .. versionchanged:: 0.3.9 + Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ @@ -4825,7 +4938,9 @@ 'additionalProperties': True, 'description': ''' Message from an AI. + AIMessage is returned from a chat model as a response to a prompt. + This message represents the output of the model and consists of both the raw output as returned by the model together standardized fields (e.g., tool calls, usage metadata) added by the LangChain framework. @@ -5205,6 +5320,7 @@ 'ChatPromptValueConcrete': dict({ 'description': ''' Chat prompt value which explicitly lists out the message types it accepts. + For use in external schemas. ''', 'properties': dict({ @@ -5268,8 +5384,10 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. + FunctionMessage are an older version of the ToolMessage schema, and do not contain the tool_call_id field. + The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -5399,10 +5517,15 @@ 'additionalProperties': True, 'description': ''' Message from a human. + HumanMessages are messages that are passed in from a human to the model. + Example: + .. code-block:: python + from langchain_core.messages import HumanMessage, SystemMessage + messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -5411,6 +5534,7 @@ content="What is your name?" ) ] + # Instantiate a chat model and invoke it with the messages model = ... print(model.invoke(messages)) @@ -5563,15 +5687,21 @@ 'InputTokenDetails': dict({ 'description': ''' Breakdown of input token counts. + Does *not* need to sum to full input token count. Does *not* need to have all keys. + Example: + .. code-block:: python + { "audio": 10, "cache_creation": 200, "cache_read": 100, } + .. versionadded:: 0.3.9 + May also hold extra provider-specific keys. ''', 'properties': dict({ @@ -5594,6 +5724,7 @@ 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', @@ -5675,13 +5806,18 @@ 'OutputTokenDetails': dict({ 'description': ''' Breakdown of output token counts. + Does *not* need to sum to full output token count. Does *not* need to have all keys. + Example: + .. code-block:: python + { "audio": 10, "reasoning": 200, } + .. versionadded:: 0.3.9 ''', 'properties': dict({ @@ -5720,11 +5856,16 @@ 'additionalProperties': True, 'description': ''' Message for priming AI behavior. + The system message is usually passed in as the first of a sequence of input messages. + Example: + .. code-block:: python + from langchain_core.messages import HumanMessage, SystemMessage + messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -5733,6 +5874,7 @@ content="What is your name?" ) ] + # Define a chat model and invoke it with the messages print(model.invoke(messages)) ''', @@ -5874,13 +6016,17 @@ 'ToolCall': dict({ 'description': ''' Represents a request to call a tool. + Example: + .. code-block:: python + { "name": "foo", "args": {"a": 1}, "id": "123" } + This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". ''', @@ -5920,13 +6066,18 @@ 'ToolCallChunk': dict({ 'description': ''' A chunk of a tool call (e.g., as part of a stream). + When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their values of `index` are equal and not None. + Example: + .. code-block:: python + left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] + ( AIMessageChunk(content="", tool_call_chunks=left_chunks) + AIMessageChunk(content="", tool_call_chunks=right_chunks) @@ -5995,27 +6146,40 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. + ToolMessages contain the result of a tool invocation. Typically, the result is encoded inside the `content` field. + Example: A ToolMessage representing a result of 42 from a tool call with id + .. code-block:: python + from langchain_core.messages import ToolMessage + ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') + + Example: A ToolMessage where only part of the tool output is sent to the model and the full output is passed in to artifact. + .. versionadded:: 0.2.17 + .. code-block:: python + from langchain_core.messages import ToolMessage + tool_output = { "stdout": "From the graph we can see that the correlation between x and y is ...", "stderr": None, "artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."}, } + ToolMessage( content=tool_output["stdout"], artifact=tool_output, tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) + The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -6182,9 +6346,13 @@ 'UsageMetadata': dict({ 'description': ''' Usage metadata for a message, such as token counts. + This is a standard representation of token usage that is consistent across models. + Example: + .. code-block:: python + { "input_tokens": 350, "output_tokens": 240, @@ -6199,7 +6367,9 @@ "reasoning": 200, } } + .. versionchanged:: 0.3.9 + Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ @@ -6241,7 +6411,9 @@ 'additionalProperties': True, 'description': ''' Message from an AI. + AIMessage is returned from a chat model as a response to a prompt. + This message represents the output of the model and consists of both the raw output as returned by the model together standardized fields (e.g., tool calls, usage metadata) added by the LangChain framework. @@ -6622,8 +6794,10 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. + FunctionMessage are an older version of the ToolMessage schema, and do not contain the tool_call_id field. + The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -6753,10 +6927,15 @@ 'additionalProperties': True, 'description': ''' Message from a human. + HumanMessages are messages that are passed in from a human to the model. + Example: + .. code-block:: python + from langchain_core.messages import HumanMessage, SystemMessage + messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -6765,6 +6944,7 @@ content="What is your name?" ) ] + # Instantiate a chat model and invoke it with the messages model = ... print(model.invoke(messages)) @@ -6917,15 +7097,21 @@ 'InputTokenDetails': dict({ 'description': ''' Breakdown of input token counts. + Does *not* need to sum to full input token count. Does *not* need to have all keys. + Example: + .. code-block:: python + { "audio": 10, "cache_creation": 200, "cache_read": 100, } + .. versionadded:: 0.3.9 + May also hold extra provider-specific keys. ''', 'properties': dict({ @@ -6948,6 +7134,7 @@ 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', @@ -7029,13 +7216,18 @@ 'OutputTokenDetails': dict({ 'description': ''' Breakdown of output token counts. + Does *not* need to sum to full output token count. Does *not* need to have all keys. + Example: + .. code-block:: python + { "audio": 10, "reasoning": 200, } + .. versionadded:: 0.3.9 ''', 'properties': dict({ @@ -7055,11 +7247,16 @@ 'additionalProperties': True, 'description': ''' Message for priming AI behavior. + The system message is usually passed in as the first of a sequence of input messages. + Example: + .. code-block:: python + from langchain_core.messages import HumanMessage, SystemMessage + messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -7068,6 +7265,7 @@ content="What is your name?" ) ] + # Define a chat model and invoke it with the messages print(model.invoke(messages)) ''', @@ -7209,13 +7407,17 @@ 'ToolCall': dict({ 'description': ''' Represents a request to call a tool. + Example: + .. code-block:: python + { "name": "foo", "args": {"a": 1}, "id": "123" } + This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". ''', @@ -7255,13 +7457,18 @@ 'ToolCallChunk': dict({ 'description': ''' A chunk of a tool call (e.g., as part of a stream). + When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their values of `index` are equal and not None. + Example: + .. code-block:: python + left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] + ( AIMessageChunk(content="", tool_call_chunks=left_chunks) + AIMessageChunk(content="", tool_call_chunks=right_chunks) @@ -7330,27 +7537,40 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. + ToolMessages contain the result of a tool invocation. Typically, the result is encoded inside the `content` field. + Example: A ToolMessage representing a result of 42 from a tool call with id + .. code-block:: python + from langchain_core.messages import ToolMessage + ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') + + Example: A ToolMessage where only part of the tool output is sent to the model and the full output is passed in to artifact. + .. versionadded:: 0.2.17 + .. code-block:: python + from langchain_core.messages import ToolMessage + tool_output = { "stdout": "From the graph we can see that the correlation between x and y is ...", "stderr": None, "artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."}, } + ToolMessage( content=tool_output["stdout"], artifact=tool_output, tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) + The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -7517,9 +7737,13 @@ 'UsageMetadata': dict({ 'description': ''' Usage metadata for a message, such as token counts. + This is a standard representation of token usage that is consistent across models. + Example: + .. code-block:: python + { "input_tokens": 350, "output_tokens": 240, @@ -7534,7 +7758,9 @@ "reasoning": 200, } } + .. versionchanged:: 0.3.9 + Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ @@ -7668,7 +7894,9 @@ 'additionalProperties': True, 'description': ''' Message from an AI. + AIMessage is returned from a chat model as a response to a prompt. + This message represents the output of the model and consists of both the raw output as returned by the model together standardized fields (e.g., tool calls, usage metadata) added by the LangChain framework. @@ -8048,6 +8276,7 @@ 'ChatPromptValueConcrete': dict({ 'description': ''' Chat prompt value which explicitly lists out the message types it accepts. + For use in external schemas. ''', 'properties': dict({ @@ -8111,8 +8340,10 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. + FunctionMessage are an older version of the ToolMessage schema, and do not contain the tool_call_id field. + The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -8242,10 +8473,15 @@ 'additionalProperties': True, 'description': ''' Message from a human. + HumanMessages are messages that are passed in from a human to the model. + Example: + .. code-block:: python + from langchain_core.messages import HumanMessage, SystemMessage + messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -8254,6 +8490,7 @@ content="What is your name?" ) ] + # Instantiate a chat model and invoke it with the messages model = ... print(model.invoke(messages)) @@ -8406,15 +8643,21 @@ 'InputTokenDetails': dict({ 'description': ''' Breakdown of input token counts. + Does *not* need to sum to full input token count. Does *not* need to have all keys. + Example: + .. code-block:: python + { "audio": 10, "cache_creation": 200, "cache_read": 100, } + .. versionadded:: 0.3.9 + May also hold extra provider-specific keys. ''', 'properties': dict({ @@ -8437,6 +8680,7 @@ 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', @@ -8518,13 +8762,18 @@ 'OutputTokenDetails': dict({ 'description': ''' Breakdown of output token counts. + Does *not* need to sum to full output token count. Does *not* need to have all keys. + Example: + .. code-block:: python + { "audio": 10, "reasoning": 200, } + .. versionadded:: 0.3.9 ''', 'properties': dict({ @@ -8563,11 +8812,16 @@ 'additionalProperties': True, 'description': ''' Message for priming AI behavior. + The system message is usually passed in as the first of a sequence of input messages. + Example: + .. code-block:: python + from langchain_core.messages import HumanMessage, SystemMessage + messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -8576,6 +8830,7 @@ content="What is your name?" ) ] + # Define a chat model and invoke it with the messages print(model.invoke(messages)) ''', @@ -8717,13 +8972,17 @@ 'ToolCall': dict({ 'description': ''' Represents a request to call a tool. + Example: + .. code-block:: python + { "name": "foo", "args": {"a": 1}, "id": "123" } + This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". ''', @@ -8763,13 +9022,18 @@ 'ToolCallChunk': dict({ 'description': ''' A chunk of a tool call (e.g., as part of a stream). + When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their values of `index` are equal and not None. + Example: + .. code-block:: python + left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] + ( AIMessageChunk(content="", tool_call_chunks=left_chunks) + AIMessageChunk(content="", tool_call_chunks=right_chunks) @@ -8838,27 +9102,40 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. + ToolMessages contain the result of a tool invocation. Typically, the result is encoded inside the `content` field. + Example: A ToolMessage representing a result of 42 from a tool call with id + .. code-block:: python + from langchain_core.messages import ToolMessage + ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') + + Example: A ToolMessage where only part of the tool output is sent to the model and the full output is passed in to artifact. + .. versionadded:: 0.2.17 + .. code-block:: python + from langchain_core.messages import ToolMessage + tool_output = { "stdout": "From the graph we can see that the correlation between x and y is ...", "stderr": None, "artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."}, } + ToolMessage( content=tool_output["stdout"], artifact=tool_output, tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) + The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -9025,9 +9302,13 @@ 'UsageMetadata': dict({ 'description': ''' Usage metadata for a message, such as token counts. + This is a standard representation of token usage that is consistent across models. + Example: + .. code-block:: python + { "input_tokens": 350, "output_tokens": 240, @@ -9042,7 +9323,9 @@ "reasoning": 200, } } + .. versionchanged:: 0.3.9 + Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ @@ -9129,7 +9412,9 @@ 'additionalProperties': True, 'description': ''' Message from an AI. + AIMessage is returned from a chat model as a response to a prompt. + This message represents the output of the model and consists of both the raw output as returned by the model together standardized fields (e.g., tool calls, usage metadata) added by the LangChain framework. @@ -9510,8 +9795,10 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. + FunctionMessage are an older version of the ToolMessage schema, and do not contain the tool_call_id field. + The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -9641,10 +9928,15 @@ 'additionalProperties': True, 'description': ''' Message from a human. + HumanMessages are messages that are passed in from a human to the model. + Example: + .. code-block:: python + from langchain_core.messages import HumanMessage, SystemMessage + messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -9653,6 +9945,7 @@ content="What is your name?" ) ] + # Instantiate a chat model and invoke it with the messages model = ... print(model.invoke(messages)) @@ -9805,15 +10098,21 @@ 'InputTokenDetails': dict({ 'description': ''' Breakdown of input token counts. + Does *not* need to sum to full input token count. Does *not* need to have all keys. + Example: + .. code-block:: python + { "audio": 10, "cache_creation": 200, "cache_read": 100, } + .. versionadded:: 0.3.9 + May also hold extra provider-specific keys. ''', 'properties': dict({ @@ -9836,6 +10135,7 @@ 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', @@ -9917,13 +10217,18 @@ 'OutputTokenDetails': dict({ 'description': ''' Breakdown of output token counts. + Does *not* need to sum to full output token count. Does *not* need to have all keys. + Example: + .. code-block:: python + { "audio": 10, "reasoning": 200, } + .. versionadded:: 0.3.9 ''', 'properties': dict({ @@ -9943,11 +10248,16 @@ 'additionalProperties': True, 'description': ''' Message for priming AI behavior. + The system message is usually passed in as the first of a sequence of input messages. + Example: + .. code-block:: python + from langchain_core.messages import HumanMessage, SystemMessage + messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -9956,6 +10266,7 @@ content="What is your name?" ) ] + # Define a chat model and invoke it with the messages print(model.invoke(messages)) ''', @@ -10097,13 +10408,17 @@ 'ToolCall': dict({ 'description': ''' Represents a request to call a tool. + Example: + .. code-block:: python + { "name": "foo", "args": {"a": 1}, "id": "123" } + This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". ''', @@ -10143,13 +10458,18 @@ 'ToolCallChunk': dict({ 'description': ''' A chunk of a tool call (e.g., as part of a stream). + When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their values of `index` are equal and not None. + Example: + .. code-block:: python + left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] + ( AIMessageChunk(content="", tool_call_chunks=left_chunks) + AIMessageChunk(content="", tool_call_chunks=right_chunks) @@ -10218,27 +10538,40 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. + ToolMessages contain the result of a tool invocation. Typically, the result is encoded inside the `content` field. + Example: A ToolMessage representing a result of 42 from a tool call with id + .. code-block:: python + from langchain_core.messages import ToolMessage + ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') + + Example: A ToolMessage where only part of the tool output is sent to the model and the full output is passed in to artifact. + .. versionadded:: 0.2.17 + .. code-block:: python + from langchain_core.messages import ToolMessage + tool_output = { "stdout": "From the graph we can see that the correlation between x and y is ...", "stderr": None, "artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."}, } + ToolMessage( content=tool_output["stdout"], artifact=tool_output, tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) + The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -10405,9 +10738,13 @@ 'UsageMetadata': dict({ 'description': ''' Usage metadata for a message, such as token counts. + This is a standard representation of token usage that is consistent across models. + Example: + .. code-block:: python + { "input_tokens": 350, "output_tokens": 240, @@ -10422,7 +10759,9 @@ "reasoning": 200, } } + .. versionchanged:: 0.3.9 + Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ @@ -10464,7 +10803,9 @@ 'additionalProperties': True, 'description': ''' Message from an AI. + AIMessage is returned from a chat model as a response to a prompt. + This message represents the output of the model and consists of both the raw output as returned by the model together standardized fields (e.g., tool calls, usage metadata) added by the LangChain framework. @@ -10844,6 +11185,7 @@ 'ChatPromptValueConcrete': dict({ 'description': ''' Chat prompt value which explicitly lists out the message types it accepts. + For use in external schemas. ''', 'properties': dict({ @@ -10907,8 +11249,10 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. + FunctionMessage are an older version of the ToolMessage schema, and do not contain the tool_call_id field. + The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -11038,10 +11382,15 @@ 'additionalProperties': True, 'description': ''' Message from a human. + HumanMessages are messages that are passed in from a human to the model. + Example: + .. code-block:: python + from langchain_core.messages import HumanMessage, SystemMessage + messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -11050,6 +11399,7 @@ content="What is your name?" ) ] + # Instantiate a chat model and invoke it with the messages model = ... print(model.invoke(messages)) @@ -11202,15 +11552,21 @@ 'InputTokenDetails': dict({ 'description': ''' Breakdown of input token counts. + Does *not* need to sum to full input token count. Does *not* need to have all keys. + Example: + .. code-block:: python + { "audio": 10, "cache_creation": 200, "cache_read": 100, } + .. versionadded:: 0.3.9 + May also hold extra provider-specific keys. ''', 'properties': dict({ @@ -11233,6 +11589,7 @@ 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', @@ -11314,13 +11671,18 @@ 'OutputTokenDetails': dict({ 'description': ''' Breakdown of output token counts. + Does *not* need to sum to full output token count. Does *not* need to have all keys. + Example: + .. code-block:: python + { "audio": 10, "reasoning": 200, } + .. versionadded:: 0.3.9 ''', 'properties': dict({ @@ -11370,11 +11732,16 @@ 'additionalProperties': True, 'description': ''' Message for priming AI behavior. + The system message is usually passed in as the first of a sequence of input messages. + Example: + .. code-block:: python + from langchain_core.messages import HumanMessage, SystemMessage + messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -11383,6 +11750,7 @@ content="What is your name?" ) ] + # Define a chat model and invoke it with the messages print(model.invoke(messages)) ''', @@ -11524,13 +11892,17 @@ 'ToolCall': dict({ 'description': ''' Represents a request to call a tool. + Example: + .. code-block:: python + { "name": "foo", "args": {"a": 1}, "id": "123" } + This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". ''', @@ -11570,13 +11942,18 @@ 'ToolCallChunk': dict({ 'description': ''' A chunk of a tool call (e.g., as part of a stream). + When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their values of `index` are equal and not None. + Example: + .. code-block:: python + left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] + ( AIMessageChunk(content="", tool_call_chunks=left_chunks) + AIMessageChunk(content="", tool_call_chunks=right_chunks) @@ -11645,27 +12022,40 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. + ToolMessages contain the result of a tool invocation. Typically, the result is encoded inside the `content` field. + Example: A ToolMessage representing a result of 42 from a tool call with id + .. code-block:: python + from langchain_core.messages import ToolMessage + ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') + + Example: A ToolMessage where only part of the tool output is sent to the model and the full output is passed in to artifact. + .. versionadded:: 0.2.17 + .. code-block:: python + from langchain_core.messages import ToolMessage + tool_output = { "stdout": "From the graph we can see that the correlation between x and y is ...", "stderr": None, "artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."}, } + ToolMessage( content=tool_output["stdout"], artifact=tool_output, tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) + The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -11832,9 +12222,13 @@ 'UsageMetadata': dict({ 'description': ''' Usage metadata for a message, such as token counts. + This is a standard representation of token usage that is consistent across models. + Example: + .. code-block:: python + { "input_tokens": 350, "output_tokens": 240, @@ -11849,7 +12243,9 @@ "reasoning": 200, } } + .. versionchanged:: 0.3.9 + Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ @@ -11903,7 +12299,9 @@ 'additionalProperties': True, 'description': ''' Message from an AI. + AIMessage is returned from a chat model as a response to a prompt. + This message represents the output of the model and consists of both the raw output as returned by the model together standardized fields (e.g., tool calls, usage metadata) added by the LangChain framework. @@ -12283,6 +12681,7 @@ 'ChatPromptValueConcrete': dict({ 'description': ''' Chat prompt value which explicitly lists out the message types it accepts. + For use in external schemas. ''', 'properties': dict({ @@ -12346,8 +12745,10 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. + FunctionMessage are an older version of the ToolMessage schema, and do not contain the tool_call_id field. + The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -12477,10 +12878,15 @@ 'additionalProperties': True, 'description': ''' Message from a human. + HumanMessages are messages that are passed in from a human to the model. + Example: + .. code-block:: python + from langchain_core.messages import HumanMessage, SystemMessage + messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -12489,6 +12895,7 @@ content="What is your name?" ) ] + # Instantiate a chat model and invoke it with the messages model = ... print(model.invoke(messages)) @@ -12641,15 +13048,21 @@ 'InputTokenDetails': dict({ 'description': ''' Breakdown of input token counts. + Does *not* need to sum to full input token count. Does *not* need to have all keys. + Example: + .. code-block:: python + { "audio": 10, "cache_creation": 200, "cache_read": 100, } + .. versionadded:: 0.3.9 + May also hold extra provider-specific keys. ''', 'properties': dict({ @@ -12672,6 +13085,7 @@ 'InvalidToolCall': dict({ 'description': ''' Allowance for errors made by LLM. + Here we add an ``error`` key to surface errors made during generation (e.g., invalid JSON arguments.) ''', @@ -12753,13 +13167,18 @@ 'OutputTokenDetails': dict({ 'description': ''' Breakdown of output token counts. + Does *not* need to sum to full output token count. Does *not* need to have all keys. + Example: + .. code-block:: python + { "audio": 10, "reasoning": 200, } + .. versionadded:: 0.3.9 ''', 'properties': dict({ @@ -12798,11 +13217,16 @@ 'additionalProperties': True, 'description': ''' Message for priming AI behavior. + The system message is usually passed in as the first of a sequence of input messages. + Example: + .. code-block:: python + from langchain_core.messages import HumanMessage, SystemMessage + messages = [ SystemMessage( content="You are a helpful assistant! Your name is Bob." @@ -12811,6 +13235,7 @@ content="What is your name?" ) ] + # Define a chat model and invoke it with the messages print(model.invoke(messages)) ''', @@ -12952,13 +13377,17 @@ 'ToolCall': dict({ 'description': ''' Represents a request to call a tool. + Example: + .. code-block:: python + { "name": "foo", "args": {"a": 1}, "id": "123" } + This represents a request to call the tool named "foo" with arguments {"a": 1} and an identifier of "123". ''', @@ -12998,13 +13427,18 @@ 'ToolCallChunk': dict({ 'description': ''' A chunk of a tool call (e.g., as part of a stream). + When merging ToolCallChunks (e.g., via AIMessageChunk.__add__), all string attributes are concatenated. Chunks are only merged if their values of `index` are equal and not None. + Example: + .. code-block:: python + left_chunks = [ToolCallChunk(name="foo", args='{"a":', index=0)] right_chunks = [ToolCallChunk(name=None, args='1}', index=0)] + ( AIMessageChunk(content="", tool_call_chunks=left_chunks) + AIMessageChunk(content="", tool_call_chunks=right_chunks) @@ -13073,27 +13507,40 @@ 'additionalProperties': True, 'description': ''' Message for passing the result of executing a tool back to a model. + ToolMessages contain the result of a tool invocation. Typically, the result is encoded inside the `content` field. + Example: A ToolMessage representing a result of 42 from a tool call with id + .. code-block:: python + from langchain_core.messages import ToolMessage + ToolMessage(content='42', tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL') + + Example: A ToolMessage where only part of the tool output is sent to the model and the full output is passed in to artifact. + .. versionadded:: 0.2.17 + .. code-block:: python + from langchain_core.messages import ToolMessage + tool_output = { "stdout": "From the graph we can see that the correlation between x and y is ...", "stderr": None, "artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."}, } + ToolMessage( content=tool_output["stdout"], artifact=tool_output, tool_call_id='call_Jja7J89XsjrOLA5r!MEOW!SL', ) + The tool_call_id field is used to associate the tool call request with the tool call response. This is useful in situations where a chat model is able to request multiple tool calls in parallel. @@ -13260,9 +13707,13 @@ 'UsageMetadata': dict({ 'description': ''' Usage metadata for a message, such as token counts. + This is a standard representation of token usage that is consistent across models. + Example: + .. code-block:: python + { "input_tokens": 350, "output_tokens": 240, @@ -13277,7 +13728,9 @@ "reasoning": 200, } } + .. versionchanged:: 0.3.9 + Added ``input_token_details`` and ``output_token_details``. ''', 'properties': dict({ From 7f218e20812b1c0190b43a1c323ba2bd908b9bf4 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 26 Aug 2025 22:49:55 -0400 Subject: [PATCH 71/73] . --- .../langchain_openai/chat_models/base.py | 61 +++++++++++++------ 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 0caa2cf015b23..189f264e5e52b 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -971,14 +971,15 @@ def _stream_responses( ) -> Iterator[ChatGenerationChunk]: kwargs["stream"] = True payload = self._get_request_payload(messages, stop=stop, **kwargs) + api_payload = self._prepare_api_payload(payload) if self.include_response_headers: raw_context_manager = self.root_client.with_raw_response.responses.create( - **payload + **api_payload ) context_manager = raw_context_manager.parse() headers = {"headers": dict(raw_context_manager.headers)} else: - context_manager = self.root_client.responses.create(**payload) + context_manager = self.root_client.responses.create(**api_payload) headers = {} original_schema_obj = kwargs.get("response_format") @@ -1024,16 +1025,19 @@ async def _astream_responses( ) -> AsyncIterator[ChatGenerationChunk]: kwargs["stream"] = True payload = self._get_request_payload(messages, stop=stop, **kwargs) + api_payload = self._prepare_api_payload(payload) if self.include_response_headers: raw_context_manager = ( await self.root_async_client.with_raw_response.responses.create( - **payload + **api_payload ) ) context_manager = raw_context_manager.parse() headers = {"headers": dict(raw_context_manager.headers)} else: - context_manager = await self.root_async_client.responses.create(**payload) + context_manager = await self.root_async_client.responses.create( + **api_payload + ) headers = {} original_schema_obj = kwargs.get("response_format") @@ -1112,16 +1116,20 @@ def _stream( "Cannot currently include response headers when response_format is " "specified." ) - payload.pop("stream") - response_stream = self.root_client.beta.chat.completions.stream(**payload) + api_payload = self._prepare_api_payload(payload) + api_payload.pop("stream") + response_stream = self.root_client.beta.chat.completions.stream( + **api_payload + ) context_manager = response_stream else: + api_payload = self._prepare_api_payload(payload) if self.include_response_headers: - raw_response = self.client.with_raw_response.create(**payload) + raw_response = self.client.with_raw_response.create(**api_payload) response = raw_response.parse() base_generation_info = {"headers": dict(raw_response.headers)} else: - response = self.client.create(**payload) + response = self.client.create(**api_payload) context_manager = response try: with context_manager as response: @@ -1187,14 +1195,15 @@ def _generate( except openai.BadRequestError as e: _handle_openai_bad_request(e) elif self._use_responses_api(payload): + api_payload = self._prepare_api_payload(payload) original_schema_obj = kwargs.get("response_format") if original_schema_obj and _is_pydantic_class(original_schema_obj): raw_response = self.root_client.responses.with_raw_response.parse( - **payload + **api_payload ) else: raw_response = self.root_client.responses.with_raw_response.create( - **payload + **api_payload ) response = raw_response.parse() if self.include_response_headers: @@ -1206,7 +1215,8 @@ def _generate( output_version=self.output_version, ) else: - raw_response = self.client.with_raw_response.create(**payload) + api_payload = self._prepare_api_payload(payload) + raw_response = self.client.with_raw_response.create(**api_payload) response = raw_response.parse() except Exception as e: if raw_response is not None and hasattr(raw_response, "http_response"): @@ -1267,6 +1277,12 @@ def _get_request_payload( ] return payload + def _prepare_api_payload(self, payload: dict) -> dict: + """Remove LangChain-specific parameters before making OpenAI API calls.""" + api_payload = payload.copy() + api_payload.pop("output_version", None) + return api_payload + def _create_chat_result( self, response: Union[dict, openai.BaseModel], @@ -1356,20 +1372,22 @@ async def _astream( "Cannot currently include response headers when response_format is " "specified." ) - payload.pop("stream") + api_payload = self._prepare_api_payload(payload) + api_payload.pop("stream") response_stream = self.root_async_client.beta.chat.completions.stream( - **payload + **api_payload ) context_manager = response_stream else: + api_payload = self._prepare_api_payload(payload) if self.include_response_headers: raw_response = await self.async_client.with_raw_response.create( - **payload + **api_payload ) response = raw_response.parse() base_generation_info = {"headers": dict(raw_response.headers)} else: - response = await self.async_client.create(**payload) + response = await self.async_client.create(**api_payload) context_manager = response try: async with context_manager as response: @@ -1424,26 +1442,28 @@ async def _agenerate( raw_response = None try: if "response_format" in payload: - payload.pop("stream") + api_payload = self._prepare_api_payload(payload) + api_payload.pop("stream") try: raw_response = await self.root_async_client.chat.completions.with_raw_response.parse( # noqa: E501 - **payload + **api_payload ) response = raw_response.parse() except openai.BadRequestError as e: _handle_openai_bad_request(e) elif self._use_responses_api(payload): + api_payload = self._prepare_api_payload(payload) original_schema_obj = kwargs.get("response_format") if original_schema_obj and _is_pydantic_class(original_schema_obj): raw_response = ( await self.root_async_client.responses.with_raw_response.parse( - **payload + **api_payload ) ) else: raw_response = ( await self.root_async_client.responses.with_raw_response.create( - **payload + **api_payload ) ) response = raw_response.parse() @@ -1456,8 +1476,9 @@ async def _agenerate( output_version=self.output_version, ) else: + api_payload = self._prepare_api_payload(payload) raw_response = await self.async_client.with_raw_response.create( - **payload + **api_payload ) response = raw_response.parse() except Exception as e: From e1add11abe7443e3b3c2b2960f9383612f0aa430 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 26 Aug 2025 23:40:28 -0400 Subject: [PATCH 72/73] fix: openai --- .../langchain_openai/chat_models/base.py | 65 +++++++++++++++++-- 1 file changed, 59 insertions(+), 6 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 189f264e5e52b..9ccd53b50283c 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -967,9 +967,16 @@ def _stream_responses( messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, + *, + output_version: Optional[str] = None, **kwargs: Any, ) -> Iterator[ChatGenerationChunk]: kwargs["stream"] = True + effective_output_version = ( + output_version + if output_version is not None + else (self.output_version or "v0") + ) payload = self._get_request_payload(messages, stop=stop, **kwargs) api_payload = self._prepare_api_payload(payload) if self.include_response_headers: @@ -1004,7 +1011,7 @@ def _stream_responses( schema=original_schema_obj, metadata=metadata, has_reasoning=has_reasoning, - output_version=self.output_version, + output_version=effective_output_version, ) if generation_chunk: if run_manager: @@ -1021,9 +1028,16 @@ async def _astream_responses( messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + *, + output_version: Optional[str] = None, **kwargs: Any, ) -> AsyncIterator[ChatGenerationChunk]: kwargs["stream"] = True + effective_output_version = ( + output_version + if output_version is not None + else (self.output_version or "v0") + ) payload = self._get_request_payload(messages, stop=stop, **kwargs) api_payload = self._prepare_api_payload(payload) if self.include_response_headers: @@ -1062,7 +1076,7 @@ async def _astream_responses( schema=original_schema_obj, metadata=metadata, has_reasoning=has_reasoning, - output_version=self.output_version, + output_version=effective_output_version, ) if generation_chunk: if run_manager: @@ -1100,9 +1114,12 @@ def _stream( run_manager: Optional[CallbackManagerForLLMRun] = None, *, stream_usage: Optional[bool] = None, + output_version: Optional[str] = None, **kwargs: Any, ) -> Iterator[ChatGenerationChunk]: kwargs["stream"] = True + # Note: output_version parameter accepted for consistency but not used + # in Chat Completions API stream_usage = self._should_stream_usage(stream_usage, **kwargs) if stream_usage: kwargs["stream_options"] = {"include_usage": stream_usage} @@ -1172,11 +1189,23 @@ def _generate( messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, + *, + output_version: Optional[str] = None, **kwargs: Any, ) -> ChatResult: + effective_output_version = ( + output_version + if output_version is not None + else (self.output_version or "v0") + ) + if self.streaming: stream_iter = self._stream( - messages, stop=stop, run_manager=run_manager, **kwargs + messages, + stop=stop, + run_manager=run_manager, + output_version=effective_output_version, + **kwargs, ) return generate_from_stream(stream_iter) payload = self._get_request_payload(messages, stop=stop, **kwargs) @@ -1212,7 +1241,7 @@ def _generate( response, schema=original_schema_obj, metadata=generation_info, - output_version=self.output_version, + output_version=effective_output_version, ) else: api_payload = self._prepare_api_payload(payload) @@ -1356,9 +1385,12 @@ async def _astream( run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, *, stream_usage: Optional[bool] = None, + output_version: Optional[str] = None, **kwargs: Any, ) -> AsyncIterator[ChatGenerationChunk]: kwargs["stream"] = True + # Note: output_version parameter accepted for consistency but not used + # in Chat Completions API stream_usage = self._should_stream_usage(stream_usage, **kwargs) if stream_usage: kwargs["stream_options"] = {"include_usage": stream_usage} @@ -1430,11 +1462,23 @@ async def _agenerate( messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + *, + output_version: Optional[str] = None, **kwargs: Any, ) -> ChatResult: + effective_output_version = ( + output_version + if output_version is not None + else (self.output_version or "v0") + ) + if self.streaming: stream_iter = self._astream( - messages, stop=stop, run_manager=run_manager, **kwargs + messages, + stop=stop, + run_manager=run_manager, + output_version=effective_output_version, + **kwargs, ) return await agenerate_from_stream(stream_iter) payload = self._get_request_payload(messages, stop=stop, **kwargs) @@ -1473,7 +1517,7 @@ async def _agenerate( response, schema=original_schema_obj, metadata=generation_info, - output_version=self.output_version, + output_version=effective_output_version, ) else: api_payload = self._prepare_api_payload(payload) @@ -4078,6 +4122,9 @@ def _construct_lc_result_from_responses_api( ) if output_version == "v0": message = _convert_to_v03_ai_message(message) + elif output_version == "v1": + # Use content_blocks property which handles v1 conversion via block_translators + message = message.model_copy(update={"content": message.content_blocks}) return ChatResult(generations=[ChatGeneration(message=message)]) @@ -4315,6 +4362,12 @@ def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: AIMessageChunk, _convert_to_v03_ai_message(message, has_reasoning=has_reasoning), ) + elif output_version == "v1": + # Use content_blocks property which handles v1 conversion via block_translators + message = cast( + AIMessageChunk, + message.model_copy(update={"content": message.content_blocks}), + ) return ( current_index, From 19e5e96a5f74a9de6e2f8e364ac2a4af61557be0 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 26 Aug 2025 23:46:00 -0400 Subject: [PATCH 73/73] fix: anthropic --- .../anthropic/langchain_anthropic/chat_models.py | 16 ++++++++++++++++ .../openai/langchain_openai/chat_models/base.py | 8 ++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/libs/partners/anthropic/langchain_anthropic/chat_models.py b/libs/partners/anthropic/langchain_anthropic/chat_models.py index 953f1cf6a3d43..7132344bd4977 100644 --- a/libs/partners/anthropic/langchain_anthropic/chat_models.py +++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py @@ -1596,8 +1596,11 @@ def _stream( run_manager: Optional[CallbackManagerForLLMRun] = None, *, stream_usage: Optional[bool] = None, + output_version: Optional[str] = None, **kwargs: Any, ) -> Iterator[ChatGenerationChunk]: + # Note: output_version accepted for interface consistency; format conversion + # handled by core if stream_usage is None: stream_usage = self.stream_usage kwargs["stream"] = True @@ -1632,8 +1635,11 @@ async def _astream( run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, *, stream_usage: Optional[bool] = None, + output_version: Optional[str] = None, **kwargs: Any, ) -> AsyncIterator[ChatGenerationChunk]: + # Note: output_version accepted for interface consistency; format conversion + # handled by core if stream_usage is None: stream_usage = self.stream_usage kwargs["stream"] = True @@ -1715,13 +1721,18 @@ def _generate( messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, + *, + output_version: Optional[str] = None, **kwargs: Any, ) -> ChatResult: + # Note: output_version accepted for interface consistency; format conversion + # handled by core if self.streaming: stream_iter = self._stream( messages, stop=stop, run_manager=run_manager, + output_version=output_version, **kwargs, ) return generate_from_stream(stream_iter) @@ -1737,13 +1748,18 @@ async def _agenerate( messages: list[BaseMessage], stop: Optional[list[str]] = None, run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + *, + output_version: Optional[str] = None, **kwargs: Any, ) -> ChatResult: + # Note: output_version accepted for interface consistency; format conversion + # handled by core if self.streaming: stream_iter = self._astream( messages, stop=stop, run_manager=run_manager, + output_version=output_version, **kwargs, ) return await agenerate_from_stream(stream_iter) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 9ccd53b50283c..af0b99e5ea1b4 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -1118,8 +1118,8 @@ def _stream( **kwargs: Any, ) -> Iterator[ChatGenerationChunk]: kwargs["stream"] = True - # Note: output_version parameter accepted for consistency but not used - # in Chat Completions API + # Note: output_version accepted for interface consistency; format conversion + # handled by core stream_usage = self._should_stream_usage(stream_usage, **kwargs) if stream_usage: kwargs["stream_options"] = {"include_usage": stream_usage} @@ -1389,8 +1389,8 @@ async def _astream( **kwargs: Any, ) -> AsyncIterator[ChatGenerationChunk]: kwargs["stream"] = True - # Note: output_version parameter accepted for consistency but not used - # in Chat Completions API + # Note: output_version accepted for interface consistency; format conversion + # handled by core stream_usage = self._should_stream_usage(stream_usage, **kwargs) if stream_usage: kwargs["stream_options"] = {"include_usage": stream_usage}