diff --git a/core/llm/llms/OpenRouter.ts b/core/llm/llms/OpenRouter.ts index b35aaa59c33..634d06136d6 100644 --- a/core/llm/llms/OpenRouter.ts +++ b/core/llm/llms/OpenRouter.ts @@ -1,3 +1,5 @@ +import { ChatCompletionCreateParams } from "openai/resources/index"; + import { LLMOptions } from "../../index.js"; import { osModelsEditPrompt } from "../templates/edit.js"; @@ -13,6 +15,122 @@ class OpenRouter extends OpenAI { }, useLegacyCompletionsEndpoint: false, }; + + /** + * Detect if the model is an Anthropic/Claude model + */ + private isAnthropicModel(model?: string): boolean { + if (!model) return false; + const modelLower = model.toLowerCase(); + return ( + modelLower.includes("claude") || modelLower.includes("anthropic/claude") + ); + } + + /** + * Add cache_control to message content for Anthropic models + */ + private addCacheControlToContent(content: any, addCaching: boolean): any { + if (!addCaching) return content; + + if (typeof content === "string") { + return [ + { + type: "text", + text: content, + cache_control: { type: "ephemeral" }, + }, + ]; + } + + if (Array.isArray(content)) { + // For array content, add cache_control to the last text item + return content.map((part, idx) => { + if (part.type === "text" && idx === content.length - 1) { + return { + ...part, + cache_control: { type: "ephemeral" }, + }; + } + return part; + }); + } + + return content; + } + + /** + * Override modifyChatBody to add Anthropic caching when appropriate + */ + protected modifyChatBody( + body: ChatCompletionCreateParams, + ): ChatCompletionCreateParams { + // First apply parent modifications + body = super.modifyChatBody(body); + + // Check if we should apply Anthropic caching + if (!this.isAnthropicModel(body.model) || !this.cacheBehavior) { + return body; + } + + const shouldCacheConversation = this.cacheBehavior.cacheConversation; + const shouldCacheSystemMessage = this.cacheBehavior.cacheSystemMessage; + + if (!shouldCacheConversation && !shouldCacheSystemMessage) { + return body; + } + + // Follow the same logic as Anthropic.ts: filter out system messages first + const filteredMessages = body.messages.filter( + (m: any) => m.role !== "system" && !!m.content, + ); + + // Find the last two user message indices from the filtered array + const lastTwoUserMsgIndices = filteredMessages + .map((msg: any, index: number) => (msg.role === "user" ? index : -1)) + .filter((index: number) => index !== -1) + .slice(-2); + + // Create a mapping from filtered indices to original indices + let filteredIndex = 0; + const filteredToOriginalIndexMap: number[] = []; + body.messages.forEach((msg: any, originalIndex: number) => { + if (msg.role !== "system" && !!msg.content) { + filteredToOriginalIndexMap[filteredIndex] = originalIndex; + filteredIndex++; + } + }); + + // Modify messages to add cache_control + body.messages = body.messages.map((message: any, idx) => { + // Handle system message caching + if (message.role === "system" && shouldCacheSystemMessage) { + return { + ...message, + content: this.addCacheControlToContent(message.content, true), + }; + } + + // Handle conversation caching for last two user messages + // Check if this message's index (in filtered array) is one of the last two user messages + const filteredIdx = filteredToOriginalIndexMap.indexOf(idx); + if ( + message.role === "user" && + shouldCacheConversation && + filteredIdx !== -1 && + lastTwoUserMsgIndices.includes(filteredIdx) + ) { + return { + ...message, + content: this.addCacheControlToContent(message.content, true), + }; + } + + return message; + }); + + return body; + } } export default OpenRouter; diff --git a/core/llm/llms/OpenRouter.vitest.ts b/core/llm/llms/OpenRouter.vitest.ts new file mode 100644 index 00000000000..e6e8afdd216 --- /dev/null +++ b/core/llm/llms/OpenRouter.vitest.ts @@ -0,0 +1,236 @@ +import { ChatCompletionCreateParams } from "openai/resources/index"; +import { describe, expect, it } from "vitest"; + +import OpenRouter from "./OpenRouter"; + +describe("OpenRouter Anthropic Caching", () => { + it("should detect Anthropic models correctly", () => { + const openRouter = new OpenRouter({ + model: "claude-3-5-sonnet-latest", + apiKey: "test-key", + }); + + // Test private method through modifyChatBody + const body: ChatCompletionCreateParams = { + model: "claude-3-5-sonnet-latest", + messages: [], + }; + + // Should not throw + openRouter["modifyChatBody"](body); + }); + + it("should add cache_control to user messages when caching is enabled", () => { + const openRouter = new OpenRouter({ + model: "anthropic/claude-3.5-sonnet", + apiKey: "test-key", + cacheBehavior: { + cacheConversation: true, + cacheSystemMessage: false, + }, + }); + + const body: ChatCompletionCreateParams = { + model: "anthropic/claude-3.5-sonnet", + messages: [ + { role: "user", content: "First message" }, + { role: "assistant", content: "Response" }, + { role: "user", content: "Second message" }, + { role: "assistant", content: "Another response" }, + { role: "user", content: "Third message" }, + ], + }; + + const modifiedBody = openRouter["modifyChatBody"](body); + + // Check that the last two user messages have cache_control + const userMessages = modifiedBody.messages.filter( + (msg: any) => msg.role === "user", + ); + + // First user message should not have cache_control + expect(userMessages[0].content).toBe("First message"); + + // Last two user messages should have cache_control + expect(userMessages[1].content).toEqual([ + { + type: "text", + text: "Second message", + cache_control: { type: "ephemeral" }, + }, + ]); + + expect(userMessages[2].content).toEqual([ + { + type: "text", + text: "Third message", + cache_control: { type: "ephemeral" }, + }, + ]); + }); + + it("should correctly handle cache_control with system messages present", () => { + const openRouter = new OpenRouter({ + model: "claude-3-5-sonnet-latest", + apiKey: "test-key", + cacheBehavior: { + cacheConversation: true, + cacheSystemMessage: true, + }, + }); + + const body: ChatCompletionCreateParams = { + model: "claude-3-5-sonnet-latest", + messages: [ + { role: "system", content: "You are a helpful assistant" }, + { role: "user", content: "First user message" }, + { role: "assistant", content: "First assistant response" }, + { role: "user", content: "Second user message" }, + { role: "assistant", content: "Second assistant response" }, + { role: "user", content: "Third user message" }, + ], + }; + + const modifiedBody = openRouter["modifyChatBody"](body); + + // System message should have cache_control + expect(modifiedBody.messages[0]).toEqual({ + role: "system", + content: [ + { + type: "text", + text: "You are a helpful assistant", + cache_control: { type: "ephemeral" }, + }, + ], + }); + + // Check user messages - should follow Anthropic filtering logic + const userMessages = modifiedBody.messages.filter( + (msg: any) => msg.role === "user", + ); + + // First user message should NOT have cache_control (only last 2) + expect(userMessages[0].content).toBe("First user message"); + + // Last two user messages should have cache_control + expect(userMessages[1].content).toEqual([ + { + type: "text", + text: "Second user message", + cache_control: { type: "ephemeral" }, + }, + ]); + + expect(userMessages[2].content).toEqual([ + { + type: "text", + text: "Third user message", + cache_control: { type: "ephemeral" }, + }, + ]); + + // Assistant messages should remain unchanged + expect(modifiedBody.messages[2].content).toBe("First assistant response"); + expect(modifiedBody.messages[4].content).toBe("Second assistant response"); + }); + + it("should add cache_control to system message when caching is enabled", () => { + const openRouter = new OpenRouter({ + model: "claude-3-5-sonnet-latest", + apiKey: "test-key", + cacheBehavior: { + cacheConversation: false, + cacheSystemMessage: true, + }, + }); + + const body: ChatCompletionCreateParams = { + model: "claude-3-5-sonnet-latest", + messages: [ + { role: "system", content: "You are a helpful assistant" }, + { role: "user", content: "Hello" }, + ], + }; + + const modifiedBody = openRouter["modifyChatBody"](body); + + // System message should have cache_control + expect(modifiedBody.messages[0]).toEqual({ + role: "system", + content: [ + { + type: "text", + text: "You are a helpful assistant", + cache_control: { type: "ephemeral" }, + }, + ], + }); + + // User message should remain unchanged + expect(modifiedBody.messages[1]).toEqual({ + role: "user", + content: "Hello", + }); + }); + + it("should handle array content correctly", () => { + const openRouter = new OpenRouter({ + model: "claude-3-5-sonnet-latest", + apiKey: "test-key", + cacheBehavior: { + cacheConversation: true, + cacheSystemMessage: false, + }, + }); + + const body: ChatCompletionCreateParams = { + model: "claude-3-5-sonnet-latest", + messages: [ + { + role: "user", + content: [ + { type: "text", text: "First part" }, + { type: "text", text: "Second part" }, + ], + }, + ], + }; + + const modifiedBody = openRouter["modifyChatBody"](body); + + // Only the last text part should have cache_control + expect(modifiedBody.messages[0].content).toEqual([ + { type: "text", text: "First part" }, + { + type: "text", + text: "Second part", + cache_control: { type: "ephemeral" }, + }, + ]); + }); + + it("should not modify messages for non-Anthropic models", () => { + const openRouter = new OpenRouter({ + model: "gpt-4o", + apiKey: "test-key", + cacheBehavior: { + cacheConversation: true, + cacheSystemMessage: true, + }, + }); + + const body: ChatCompletionCreateParams = { + model: "gpt-4o", + messages: [ + { role: "system", content: "System message" }, + { role: "user", content: "User message" }, + ], + }; + + const modifiedBody = openRouter["modifyChatBody"](body); + + // Messages should remain unchanged + expect(modifiedBody.messages).toEqual(body.messages); + }); +});