Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions core/llm/llms/OpenRouter.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { ChatCompletionCreateParams } from "openai/resources/index";

import { LLMOptions } from "../../index.js";
import { osModelsEditPrompt } from "../templates/edit.js";

Expand All @@ -13,6 +15,122 @@ class OpenRouter extends OpenAI {
},
useLegacyCompletionsEndpoint: false,
};

/**
* Detect if the model is an Anthropic/Claude model
*/
private isAnthropicModel(model?: string): boolean {
if (!model) return false;
const modelLower = model.toLowerCase();
return (
modelLower.includes("claude") || modelLower.includes("anthropic/claude")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is redundant, since if includes anthropic/claude will always include claude

);
}

/**
* Add cache_control to message content for Anthropic models
*/
private addCacheControlToContent(content: any, addCaching: boolean): any {
if (!addCaching) return content;

if (typeof content === "string") {
return [
{
type: "text",
text: content,
cache_control: { type: "ephemeral" },
},
];
}

if (Array.isArray(content)) {
// For array content, add cache_control to the last text item
return content.map((part, idx) => {
if (part.type === "text" && idx === content.length - 1) {
return {
...part,
cache_control: { type: "ephemeral" },
};
}
return part;
});
}

return content;
}

/**
* Override modifyChatBody to add Anthropic caching when appropriate
*/
protected modifyChatBody(
body: ChatCompletionCreateParams,
): ChatCompletionCreateParams {
// First apply parent modifications
body = super.modifyChatBody(body);

// Check if we should apply Anthropic caching
if (!this.isAnthropicModel(body.model) || !this.cacheBehavior) {
return body;
}

const shouldCacheConversation = this.cacheBehavior.cacheConversation;
const shouldCacheSystemMessage = this.cacheBehavior.cacheSystemMessage;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also handle the deprecated but relevant completion option: completionOptions.promptCaching
See relevant hotfix here: #7652


if (!shouldCacheConversation && !shouldCacheSystemMessage) {
return body;
}

// Follow the same logic as Anthropic.ts: filter out system messages first
const filteredMessages = body.messages.filter(
(m: any) => m.role !== "system" && !!m.content,
);

// Find the last two user message indices from the filtered array
const lastTwoUserMsgIndices = filteredMessages
.map((msg: any, index: number) => (msg.role === "user" ? index : -1))
.filter((index: number) => index !== -1)
.slice(-2);

// Create a mapping from filtered indices to original indices
let filteredIndex = 0;
const filteredToOriginalIndexMap: number[] = [];
body.messages.forEach((msg: any, originalIndex: number) => {
if (msg.role !== "system" && !!msg.content) {
filteredToOriginalIndexMap[filteredIndex] = originalIndex;
filteredIndex++;
}
});

// Modify messages to add cache_control
body.messages = body.messages.map((message: any, idx) => {
// Handle system message caching
if (message.role === "system" && shouldCacheSystemMessage) {
return {
...message,
content: this.addCacheControlToContent(message.content, true),
};
}

// Handle conversation caching for last two user messages
// Check if this message's index (in filtered array) is one of the last two user messages
const filteredIdx = filteredToOriginalIndexMap.indexOf(idx);
if (
message.role === "user" &&
shouldCacheConversation &&
filteredIdx !== -1 &&
lastTwoUserMsgIndices.includes(filteredIdx)
) {
return {
...message,
content: this.addCacheControlToContent(message.content, true),
};
}

return message;
});

return body;
}
}

export default OpenRouter;
236 changes: 236 additions & 0 deletions core/llm/llms/OpenRouter.vitest.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
import { ChatCompletionCreateParams } from "openai/resources/index";
import { describe, expect, it } from "vitest";

import OpenRouter from "./OpenRouter";

describe("OpenRouter Anthropic Caching", () => {
it("should detect Anthropic models correctly", () => {
const openRouter = new OpenRouter({
model: "claude-3-5-sonnet-latest",
apiKey: "test-key",
});

// Test private method through modifyChatBody
const body: ChatCompletionCreateParams = {
model: "claude-3-5-sonnet-latest",
messages: [],
};

// Should not throw
openRouter["modifyChatBody"](body);
});

it("should add cache_control to user messages when caching is enabled", () => {
const openRouter = new OpenRouter({
model: "anthropic/claude-3.5-sonnet",
apiKey: "test-key",
cacheBehavior: {
cacheConversation: true,
cacheSystemMessage: false,
},
});

const body: ChatCompletionCreateParams = {
model: "anthropic/claude-3.5-sonnet",
messages: [
{ role: "user", content: "First message" },
{ role: "assistant", content: "Response" },
{ role: "user", content: "Second message" },
{ role: "assistant", content: "Another response" },
{ role: "user", content: "Third message" },
],
};

const modifiedBody = openRouter["modifyChatBody"](body);

// Check that the last two user messages have cache_control
const userMessages = modifiedBody.messages.filter(
(msg: any) => msg.role === "user",
);

// First user message should not have cache_control
expect(userMessages[0].content).toBe("First message");

// Last two user messages should have cache_control
expect(userMessages[1].content).toEqual([
{
type: "text",
text: "Second message",
cache_control: { type: "ephemeral" },
},
]);

expect(userMessages[2].content).toEqual([
{
type: "text",
text: "Third message",
cache_control: { type: "ephemeral" },
},
]);
});

it("should correctly handle cache_control with system messages present", () => {
const openRouter = new OpenRouter({
model: "claude-3-5-sonnet-latest",
apiKey: "test-key",
cacheBehavior: {
cacheConversation: true,
cacheSystemMessage: true,
},
});

const body: ChatCompletionCreateParams = {
model: "claude-3-5-sonnet-latest",
messages: [
{ role: "system", content: "You are a helpful assistant" },
{ role: "user", content: "First user message" },
{ role: "assistant", content: "First assistant response" },
{ role: "user", content: "Second user message" },
{ role: "assistant", content: "Second assistant response" },
{ role: "user", content: "Third user message" },
],
};

const modifiedBody = openRouter["modifyChatBody"](body);

// System message should have cache_control
expect(modifiedBody.messages[0]).toEqual({
role: "system",
content: [
{
type: "text",
text: "You are a helpful assistant",
cache_control: { type: "ephemeral" },
},
],
});

// Check user messages - should follow Anthropic filtering logic
const userMessages = modifiedBody.messages.filter(
(msg: any) => msg.role === "user",
);

// First user message should NOT have cache_control (only last 2)
expect(userMessages[0].content).toBe("First user message");

// Last two user messages should have cache_control
expect(userMessages[1].content).toEqual([
{
type: "text",
text: "Second user message",
cache_control: { type: "ephemeral" },
},
]);

expect(userMessages[2].content).toEqual([
{
type: "text",
text: "Third user message",
cache_control: { type: "ephemeral" },
},
]);

// Assistant messages should remain unchanged
expect(modifiedBody.messages[2].content).toBe("First assistant response");
expect(modifiedBody.messages[4].content).toBe("Second assistant response");
});

it("should add cache_control to system message when caching is enabled", () => {
const openRouter = new OpenRouter({
model: "claude-3-5-sonnet-latest",
apiKey: "test-key",
cacheBehavior: {
cacheConversation: false,
cacheSystemMessage: true,
},
});

const body: ChatCompletionCreateParams = {
model: "claude-3-5-sonnet-latest",
messages: [
{ role: "system", content: "You are a helpful assistant" },
{ role: "user", content: "Hello" },
],
};

const modifiedBody = openRouter["modifyChatBody"](body);

// System message should have cache_control
expect(modifiedBody.messages[0]).toEqual({
role: "system",
content: [
{
type: "text",
text: "You are a helpful assistant",
cache_control: { type: "ephemeral" },
},
],
});

// User message should remain unchanged
expect(modifiedBody.messages[1]).toEqual({
role: "user",
content: "Hello",
});
});

it("should handle array content correctly", () => {
const openRouter = new OpenRouter({
model: "claude-3-5-sonnet-latest",
apiKey: "test-key",
cacheBehavior: {
cacheConversation: true,
cacheSystemMessage: false,
},
});

const body: ChatCompletionCreateParams = {
model: "claude-3-5-sonnet-latest",
messages: [
{
role: "user",
content: [
{ type: "text", text: "First part" },
{ type: "text", text: "Second part" },
],
},
],
};

const modifiedBody = openRouter["modifyChatBody"](body);

// Only the last text part should have cache_control
expect(modifiedBody.messages[0].content).toEqual([
{ type: "text", text: "First part" },
{
type: "text",
text: "Second part",
cache_control: { type: "ephemeral" },
},
]);
});

it("should not modify messages for non-Anthropic models", () => {
const openRouter = new OpenRouter({
model: "gpt-4o",
apiKey: "test-key",
cacheBehavior: {
cacheConversation: true,
cacheSystemMessage: true,
},
});

const body: ChatCompletionCreateParams = {
model: "gpt-4o",
messages: [
{ role: "system", content: "System message" },
{ role: "user", content: "User message" },
],
};

const modifiedBody = openRouter["modifyChatBody"](body);

// Messages should remain unchanged
expect(modifiedBody.messages).toEqual(body.messages);
});
});
Loading