-
Notifications
You must be signed in to change notification settings - Fork 3.5k
Add Anthropic caching support to OpenRouter LLM implementation #7492
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
ferenci84
wants to merge
3
commits into
continuedev:main
Choose a base branch
from
ferenci84:feature/openrouter-anthropic-caching
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+354
−0
Open
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
import { ChatCompletionCreateParams } from "openai/resources/index"; | ||
|
||
import { LLMOptions } from "../../index.js"; | ||
import { osModelsEditPrompt } from "../templates/edit.js"; | ||
|
||
|
@@ -13,6 +15,122 @@ class OpenRouter extends OpenAI { | |
}, | ||
useLegacyCompletionsEndpoint: false, | ||
}; | ||
|
||
/** | ||
* Detect if the model is an Anthropic/Claude model | ||
*/ | ||
private isAnthropicModel(model?: string): boolean { | ||
if (!model) return false; | ||
const modelLower = model.toLowerCase(); | ||
return ( | ||
modelLower.includes("claude") || modelLower.includes("anthropic/claude") | ||
); | ||
} | ||
|
||
/** | ||
* Add cache_control to message content for Anthropic models | ||
*/ | ||
private addCacheControlToContent(content: any, addCaching: boolean): any { | ||
if (!addCaching) return content; | ||
|
||
if (typeof content === "string") { | ||
return [ | ||
{ | ||
type: "text", | ||
text: content, | ||
cache_control: { type: "ephemeral" }, | ||
}, | ||
]; | ||
} | ||
|
||
if (Array.isArray(content)) { | ||
// For array content, add cache_control to the last text item | ||
return content.map((part, idx) => { | ||
if (part.type === "text" && idx === content.length - 1) { | ||
return { | ||
...part, | ||
cache_control: { type: "ephemeral" }, | ||
}; | ||
} | ||
return part; | ||
}); | ||
} | ||
|
||
return content; | ||
} | ||
|
||
/** | ||
* Override modifyChatBody to add Anthropic caching when appropriate | ||
*/ | ||
protected modifyChatBody( | ||
body: ChatCompletionCreateParams, | ||
): ChatCompletionCreateParams { | ||
// First apply parent modifications | ||
body = super.modifyChatBody(body); | ||
|
||
// Check if we should apply Anthropic caching | ||
if (!this.isAnthropicModel(body.model) || !this.cacheBehavior) { | ||
return body; | ||
} | ||
|
||
const shouldCacheConversation = this.cacheBehavior.cacheConversation; | ||
const shouldCacheSystemMessage = this.cacheBehavior.cacheSystemMessage; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also handle the deprecated but relevant completion option: completionOptions.promptCaching |
||
|
||
if (!shouldCacheConversation && !shouldCacheSystemMessage) { | ||
return body; | ||
} | ||
|
||
// Follow the same logic as Anthropic.ts: filter out system messages first | ||
const filteredMessages = body.messages.filter( | ||
(m: any) => m.role !== "system" && !!m.content, | ||
); | ||
|
||
// Find the last two user message indices from the filtered array | ||
const lastTwoUserMsgIndices = filteredMessages | ||
.map((msg: any, index: number) => (msg.role === "user" ? index : -1)) | ||
.filter((index: number) => index !== -1) | ||
.slice(-2); | ||
|
||
// Create a mapping from filtered indices to original indices | ||
let filteredIndex = 0; | ||
const filteredToOriginalIndexMap: number[] = []; | ||
body.messages.forEach((msg: any, originalIndex: number) => { | ||
if (msg.role !== "system" && !!msg.content) { | ||
filteredToOriginalIndexMap[filteredIndex] = originalIndex; | ||
filteredIndex++; | ||
} | ||
}); | ||
|
||
// Modify messages to add cache_control | ||
body.messages = body.messages.map((message: any, idx) => { | ||
// Handle system message caching | ||
if (message.role === "system" && shouldCacheSystemMessage) { | ||
return { | ||
...message, | ||
content: this.addCacheControlToContent(message.content, true), | ||
}; | ||
} | ||
|
||
// Handle conversation caching for last two user messages | ||
// Check if this message's index (in filtered array) is one of the last two user messages | ||
const filteredIdx = filteredToOriginalIndexMap.indexOf(idx); | ||
if ( | ||
message.role === "user" && | ||
shouldCacheConversation && | ||
filteredIdx !== -1 && | ||
lastTwoUserMsgIndices.includes(filteredIdx) | ||
) { | ||
return { | ||
...message, | ||
content: this.addCacheControlToContent(message.content, true), | ||
}; | ||
} | ||
|
||
return message; | ||
}); | ||
|
||
return body; | ||
} | ||
} | ||
|
||
export default OpenRouter; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,236 @@ | ||
import { ChatCompletionCreateParams } from "openai/resources/index"; | ||
import { describe, expect, it } from "vitest"; | ||
|
||
import OpenRouter from "./OpenRouter"; | ||
|
||
describe("OpenRouter Anthropic Caching", () => { | ||
it("should detect Anthropic models correctly", () => { | ||
const openRouter = new OpenRouter({ | ||
model: "claude-3-5-sonnet-latest", | ||
apiKey: "test-key", | ||
}); | ||
|
||
// Test private method through modifyChatBody | ||
const body: ChatCompletionCreateParams = { | ||
model: "claude-3-5-sonnet-latest", | ||
messages: [], | ||
}; | ||
|
||
// Should not throw | ||
openRouter["modifyChatBody"](body); | ||
}); | ||
|
||
it("should add cache_control to user messages when caching is enabled", () => { | ||
const openRouter = new OpenRouter({ | ||
model: "anthropic/claude-3.5-sonnet", | ||
apiKey: "test-key", | ||
cacheBehavior: { | ||
cacheConversation: true, | ||
cacheSystemMessage: false, | ||
}, | ||
}); | ||
|
||
const body: ChatCompletionCreateParams = { | ||
model: "anthropic/claude-3.5-sonnet", | ||
messages: [ | ||
{ role: "user", content: "First message" }, | ||
{ role: "assistant", content: "Response" }, | ||
{ role: "user", content: "Second message" }, | ||
{ role: "assistant", content: "Another response" }, | ||
{ role: "user", content: "Third message" }, | ||
], | ||
}; | ||
|
||
const modifiedBody = openRouter["modifyChatBody"](body); | ||
|
||
// Check that the last two user messages have cache_control | ||
const userMessages = modifiedBody.messages.filter( | ||
(msg: any) => msg.role === "user", | ||
); | ||
|
||
// First user message should not have cache_control | ||
expect(userMessages[0].content).toBe("First message"); | ||
|
||
// Last two user messages should have cache_control | ||
expect(userMessages[1].content).toEqual([ | ||
{ | ||
type: "text", | ||
text: "Second message", | ||
cache_control: { type: "ephemeral" }, | ||
}, | ||
]); | ||
|
||
expect(userMessages[2].content).toEqual([ | ||
{ | ||
type: "text", | ||
text: "Third message", | ||
cache_control: { type: "ephemeral" }, | ||
}, | ||
]); | ||
}); | ||
|
||
it("should correctly handle cache_control with system messages present", () => { | ||
const openRouter = new OpenRouter({ | ||
model: "claude-3-5-sonnet-latest", | ||
apiKey: "test-key", | ||
cacheBehavior: { | ||
cacheConversation: true, | ||
cacheSystemMessage: true, | ||
}, | ||
}); | ||
|
||
const body: ChatCompletionCreateParams = { | ||
model: "claude-3-5-sonnet-latest", | ||
messages: [ | ||
{ role: "system", content: "You are a helpful assistant" }, | ||
{ role: "user", content: "First user message" }, | ||
{ role: "assistant", content: "First assistant response" }, | ||
{ role: "user", content: "Second user message" }, | ||
{ role: "assistant", content: "Second assistant response" }, | ||
{ role: "user", content: "Third user message" }, | ||
], | ||
}; | ||
|
||
const modifiedBody = openRouter["modifyChatBody"](body); | ||
|
||
// System message should have cache_control | ||
expect(modifiedBody.messages[0]).toEqual({ | ||
role: "system", | ||
content: [ | ||
{ | ||
type: "text", | ||
text: "You are a helpful assistant", | ||
cache_control: { type: "ephemeral" }, | ||
}, | ||
], | ||
}); | ||
|
||
// Check user messages - should follow Anthropic filtering logic | ||
const userMessages = modifiedBody.messages.filter( | ||
(msg: any) => msg.role === "user", | ||
); | ||
|
||
// First user message should NOT have cache_control (only last 2) | ||
expect(userMessages[0].content).toBe("First user message"); | ||
|
||
// Last two user messages should have cache_control | ||
expect(userMessages[1].content).toEqual([ | ||
{ | ||
type: "text", | ||
text: "Second user message", | ||
cache_control: { type: "ephemeral" }, | ||
}, | ||
]); | ||
|
||
expect(userMessages[2].content).toEqual([ | ||
{ | ||
type: "text", | ||
text: "Third user message", | ||
cache_control: { type: "ephemeral" }, | ||
}, | ||
]); | ||
|
||
// Assistant messages should remain unchanged | ||
expect(modifiedBody.messages[2].content).toBe("First assistant response"); | ||
expect(modifiedBody.messages[4].content).toBe("Second assistant response"); | ||
}); | ||
|
||
it("should add cache_control to system message when caching is enabled", () => { | ||
const openRouter = new OpenRouter({ | ||
model: "claude-3-5-sonnet-latest", | ||
apiKey: "test-key", | ||
cacheBehavior: { | ||
cacheConversation: false, | ||
cacheSystemMessage: true, | ||
}, | ||
}); | ||
|
||
const body: ChatCompletionCreateParams = { | ||
model: "claude-3-5-sonnet-latest", | ||
messages: [ | ||
{ role: "system", content: "You are a helpful assistant" }, | ||
{ role: "user", content: "Hello" }, | ||
], | ||
}; | ||
|
||
const modifiedBody = openRouter["modifyChatBody"](body); | ||
|
||
// System message should have cache_control | ||
expect(modifiedBody.messages[0]).toEqual({ | ||
role: "system", | ||
content: [ | ||
{ | ||
type: "text", | ||
text: "You are a helpful assistant", | ||
cache_control: { type: "ephemeral" }, | ||
}, | ||
], | ||
}); | ||
|
||
// User message should remain unchanged | ||
expect(modifiedBody.messages[1]).toEqual({ | ||
role: "user", | ||
content: "Hello", | ||
}); | ||
}); | ||
|
||
it("should handle array content correctly", () => { | ||
const openRouter = new OpenRouter({ | ||
model: "claude-3-5-sonnet-latest", | ||
apiKey: "test-key", | ||
cacheBehavior: { | ||
cacheConversation: true, | ||
cacheSystemMessage: false, | ||
}, | ||
}); | ||
|
||
const body: ChatCompletionCreateParams = { | ||
model: "claude-3-5-sonnet-latest", | ||
messages: [ | ||
{ | ||
role: "user", | ||
content: [ | ||
{ type: "text", text: "First part" }, | ||
{ type: "text", text: "Second part" }, | ||
], | ||
}, | ||
], | ||
}; | ||
|
||
const modifiedBody = openRouter["modifyChatBody"](body); | ||
|
||
// Only the last text part should have cache_control | ||
expect(modifiedBody.messages[0].content).toEqual([ | ||
{ type: "text", text: "First part" }, | ||
{ | ||
type: "text", | ||
text: "Second part", | ||
cache_control: { type: "ephemeral" }, | ||
}, | ||
]); | ||
}); | ||
|
||
it("should not modify messages for non-Anthropic models", () => { | ||
const openRouter = new OpenRouter({ | ||
model: "gpt-4o", | ||
apiKey: "test-key", | ||
cacheBehavior: { | ||
cacheConversation: true, | ||
cacheSystemMessage: true, | ||
}, | ||
}); | ||
|
||
const body: ChatCompletionCreateParams = { | ||
model: "gpt-4o", | ||
messages: [ | ||
{ role: "system", content: "System message" }, | ||
{ role: "user", content: "User message" }, | ||
], | ||
}; | ||
|
||
const modifiedBody = openRouter["modifyChatBody"](body); | ||
|
||
// Messages should remain unchanged | ||
expect(modifiedBody.messages).toEqual(body.messages); | ||
}); | ||
}); |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is redundant, since if includes anthropic/claude will always include claude