Skip to content

Commit d78fc99

Browse files
authored
feat: add Mercury Apply endpoint support to Inception LLM (#7827)
* feat: add Mercury Apply endpoint support to Inception LLM * refactor: begin to clean up the apply / streamDiff code * fix: undo commenting out of instant apply * fix: use underlyingProviderName
1 parent 02465b6 commit d78fc99

File tree

19 files changed

+337
-164
lines changed

19 files changed

+337
-164
lines changed

core/core.ts

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ import { DataLogger } from "./data/log";
1717
import { CodebaseIndexer } from "./indexing/CodebaseIndexer";
1818
import DocsService from "./indexing/docs/DocsService";
1919
import { countTokens } from "./llm/countTokens";
20-
import Ollama from "./llm/llms/Ollama";
2120
import Lemonade from "./llm/llms/Lemonade";
21+
import Ollama from "./llm/llms/Ollama";
2222
import { EditAggregator } from "./nextEdit/context/aggregateEdits";
2323
import { createNewPromptFileV2 } from "./promptFiles/createNewPromptFile";
2424
import { callTool } from "./tools/callTool";
@@ -729,20 +729,13 @@ export class Core {
729729
data.fileUri ?? "current-file-stream",
730730
); // not super important since currently cancelling apply will cancel all streams it's one file at a time
731731

732-
return streamDiffLines({
733-
highlighted: data.highlighted,
734-
prefix: data.prefix,
735-
suffix: data.suffix,
732+
return streamDiffLines(
733+
data,
736734
llm,
737-
// rules included for edit, NOT apply
738-
rulesToInclude: data.includeRulesInSystemMessage
739-
? config.rules
740-
: undefined,
741-
input: data.input,
742-
language: data.language,
743-
overridePrompt: undefined,
744735
abortController,
745-
});
736+
undefined,
737+
data.includeRulesInSystemMessage ? config.rules : undefined,
738+
);
746739
});
747740

748741
on("cancelApply", async (msg) => {

core/edit/constants.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
export const EDIT_MODE_STREAM_ID = "edit-mode";
2+
3+
export const APPLY_UNIQUE_TOKEN = "<|!@#IS_APPLY!@#|>";

core/edit/recursiveStream.ts

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,14 @@
1-
import { ChatMessage, ILLM, Prediction, PromptLog } from "..";
1+
import {
2+
ChatMessage,
3+
ILLM,
4+
Prediction,
5+
PromptLog,
6+
type StreamDiffLinesType,
7+
} from "..";
28
import { DEFAULT_MAX_TOKENS } from "../llm/constants";
39
import { countTokens } from "../llm/countTokens";
410
import { renderChatMessage } from "../util/messageContent";
11+
import { APPLY_UNIQUE_TOKEN } from "./constants.js";
512

613
const INFINITE_STREAM_SAFETY = 0.9;
714

@@ -17,6 +24,7 @@ const RECURSIVE_PROMPT = `Continue EXACTLY where you left`;
1724
export async function* recursiveStream(
1825
llm: ILLM,
1926
abortController: AbortController,
27+
type: StreamDiffLinesType,
2028
prompt: ChatMessage[] | string,
2129
prediction: Prediction | undefined,
2230
currentBuffer = "",
@@ -28,8 +36,11 @@ export async function* recursiveStream(
2836
let buffer = currentBuffer;
2937
// let whiteSpaceAtEndOfBuffer = buffer.match(/\s*$/)?.[0] ?? ""; // attempts at fixing whitespace bug with recursive boundaries
3038

39+
const injectApplyToken = type === "apply" && shouldInjectApplyToken(llm);
3140
if (typeof prompt === "string") {
32-
const generator = llm.streamComplete(prompt, abortController.signal, {
41+
const finalPrompt = injectApplyToken ? prompt + APPLY_UNIQUE_TOKEN : prompt;
42+
43+
const generator = llm.streamComplete(finalPrompt, abortController.signal, {
3344
raw: true,
3445
prediction: undefined,
3546
reasoning: false,
@@ -62,7 +73,11 @@ export async function* recursiveStream(
6273
}
6374
}
6475
} else {
65-
const generator = llm.streamChat(prompt, abortController.signal, {
76+
const promptMessages = injectApplyToken
77+
? appendTokenToLastMessage(prompt, APPLY_UNIQUE_TOKEN)
78+
: prompt;
79+
80+
const generator = llm.streamChat(promptMessages, abortController.signal, {
6681
raw: true,
6782
prediction: undefined,
6883
reasoning: false,
@@ -104,3 +119,36 @@ export async function* recursiveStream(
104119
}
105120
}
106121
}
122+
123+
function shouldInjectApplyToken(llm: ILLM): boolean {
124+
const model = llm.model?.toLowerCase() ?? "";
125+
return (
126+
llm.underlyingProviderName === "inception" && model.includes("mercury")
127+
);
128+
}
129+
130+
function appendTokenToLastMessage(
131+
messages: ChatMessage[],
132+
token: string,
133+
): ChatMessage[] {
134+
if (messages.length === 0) {
135+
return messages;
136+
}
137+
138+
const lastMessage = messages[messages.length - 1];
139+
if (typeof lastMessage.content !== "string") {
140+
return messages;
141+
}
142+
143+
if (lastMessage.content.endsWith(token)) {
144+
return messages;
145+
}
146+
147+
const updatedMessages = [...messages];
148+
updatedMessages[updatedMessages.length - 1] = {
149+
...lastMessage,
150+
content: lastMessage.content + token,
151+
};
152+
153+
return updatedMessages;
154+
}

core/edit/streamDiffLines.ts

Lines changed: 35 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import {
44
ILLM,
55
Prediction,
66
RuleWithSource,
7+
StreamDiffLinesPayload,
78
ToolResultChatMessage,
89
UserChatMessage,
910
} from "../";
@@ -20,6 +21,7 @@ import { streamDiff } from "../diff/streamDiff";
2021
import { streamLines } from "../diff/util";
2122
import { getSystemMessageWithRules } from "../llm/rules/getSystemMessageWithRules";
2223
import { gptEditPrompt } from "../llm/templates/edit";
24+
import { defaultApplyPrompt } from "../llm/templates/edit/gpt";
2325
import { findLast } from "../util/findLast";
2426
import { Telemetry } from "../util/posthog";
2527
import { recursiveStream } from "./recursiveStream";
@@ -42,6 +44,20 @@ function constructEditPrompt(
4244
});
4345
}
4446

47+
function constructApplyPrompt(
48+
originalCode: string,
49+
newCode: string,
50+
llm: ILLM,
51+
) {
52+
const template = llm.promptTemplates?.apply ?? defaultApplyPrompt;
53+
const rendered = llm.renderPromptTemplate(template, [], {
54+
original_code: originalCode,
55+
new_code: newCode,
56+
});
57+
58+
return rendered;
59+
}
60+
4561
export async function* addIndentation(
4662
diffLineGenerator: AsyncGenerator<DiffLine>,
4763
indentation: string,
@@ -58,27 +74,15 @@ function modelIsInept(model: string): boolean {
5874
return !(model.includes("gpt") || model.includes("claude"));
5975
}
6076

61-
export async function* streamDiffLines({
62-
prefix,
63-
highlighted,
64-
suffix,
65-
llm,
66-
abortController,
67-
input,
68-
language,
69-
overridePrompt,
70-
rulesToInclude,
71-
}: {
72-
prefix: string;
73-
highlighted: string;
74-
suffix: string;
75-
llm: ILLM;
76-
abortController: AbortController;
77-
input: string;
78-
language: string | undefined;
79-
overridePrompt: ChatMessage[] | undefined;
80-
rulesToInclude: RuleWithSource[] | undefined;
81-
}): AsyncGenerator<DiffLine> {
77+
export async function* streamDiffLines(
78+
options: StreamDiffLinesPayload,
79+
llm: ILLM,
80+
abortController: AbortController,
81+
overridePrompt: ChatMessage[] | undefined,
82+
rulesToInclude: RuleWithSource[] | undefined,
83+
): AsyncGenerator<DiffLine> {
84+
const { type, prefix, highlighted, suffix, input, language } = options;
85+
8286
void Telemetry.capture(
8387
"inlineEdit",
8488
{
@@ -104,7 +108,9 @@ export async function* streamDiffLines({
104108
// For apply can be overridden with simply apply prompt
105109
let prompt =
106110
overridePrompt ??
107-
constructEditPrompt(prefix, highlighted, suffix, llm, input, language);
111+
(type === "apply"
112+
? constructApplyPrompt(oldLines.join("\n"), options.newCode, llm)
113+
: constructEditPrompt(prefix, highlighted, suffix, llm, input, language));
108114

109115
// Rules can be included with edit prompt
110116
// If any rules are present this will result in using chat instead of legacy completion
@@ -159,7 +165,13 @@ export async function* streamDiffLines({
159165
content: highlighted,
160166
};
161167

162-
const completion = recursiveStream(llm, abortController, prompt, prediction);
168+
const completion = recursiveStream(
169+
llm,
170+
abortController,
171+
type,
172+
prompt,
173+
prediction,
174+
);
163175

164176
let lines = streamLines(completion);
165177

core/index.d.ts

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1393,7 +1393,9 @@ export interface ApplyState {
13931393
autoFormattingDiff?: string;
13941394
}
13951395

1396-
export interface StreamDiffLinesPayload {
1396+
export type StreamDiffLinesType = "edit" | "apply";
1397+
interface StreamDiffLinesOptionsBase {
1398+
type: StreamDiffLinesType;
13971399
prefix: string;
13981400
highlighted: string;
13991401
suffix: string;
@@ -1404,6 +1406,19 @@ export interface StreamDiffLinesPayload {
14041406
fileUri?: string;
14051407
}
14061408

1409+
interface StreamDiffLinesOptionsEdit extends StreamDiffLinesOptionsBase {
1410+
type: "edit";
1411+
}
1412+
1413+
interface StreamDiffLinesOptionsApply extends StreamDiffLinesOptionsBase {
1414+
type: "apply";
1415+
newCode: string;
1416+
}
1417+
1418+
type StreamDiffLinesPayload =
1419+
| StreamDiffLinesOptionsApply
1420+
| StreamDiffLinesOptionsEdit;
1421+
14071422
export interface HighlightedCodePayload {
14081423
rangeInFileWithContents: RangeInFileWithContents;
14091424
prompt?: string;

core/llm/llms/Inception.ts

Lines changed: 63 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { streamSse } from "@continuedev/fetch";
22
import { ChatMessage, CompletionOptions, LLMOptions } from "../../index.js";
33

44
import { ChatCompletionCreateParams } from "@continuedev/openai-adapters";
5+
import { APPLY_UNIQUE_TOKEN } from "../../edit/constants.js";
56
import { UNIQUE_TOKEN } from "../../nextEdit/constants.js";
67
import OpenAI from "./OpenAI.js";
78

@@ -96,40 +97,21 @@ class Inception extends OpenAI {
9697
options: CompletionOptions,
9798
): AsyncGenerator<ChatMessage> {
9899
if (this.isNextEdit(messages)) {
99-
messages = this.removeNextEditToken(messages);
100-
101-
// Use edit/completions endpoint.
102-
const endpoint = new URL("edit/completions", this.apiBase);
103-
104-
const resp = await this.fetch(endpoint, {
105-
method: "POST",
106-
body: JSON.stringify({
107-
model: options.model,
108-
messages: messages,
109-
max_tokens: options.maxTokens,
110-
temperature: options.temperature,
111-
top_p: options.topP,
112-
frequency_penalty: options.frequencyPenalty,
113-
presence_penalty: options.presencePenalty,
114-
stop: options.stop,
115-
stream: true,
116-
}),
117-
headers: {
118-
"Content-Type": "application/json",
119-
Accept: "application/json",
120-
Authorization: `Bearer ${this.apiKey}`,
121-
},
100+
messages = this.removeToken(messages, UNIQUE_TOKEN);
101+
yield* this.streamSpecialEndpoint(
102+
"edit/completions",
103+
messages,
122104
signal,
123-
});
124-
125-
for await (const chunk of streamSse(resp)) {
126-
if (chunk.choices?.[0]?.delta?.content) {
127-
yield {
128-
role: "assistant",
129-
content: chunk.choices[0].delta.content,
130-
};
131-
}
132-
}
105+
options,
106+
);
107+
} else if (this.isApply(messages)) {
108+
messages = this.removeToken(messages, APPLY_UNIQUE_TOKEN);
109+
yield* this.streamSpecialEndpoint(
110+
"apply/completions",
111+
messages,
112+
signal,
113+
options,
114+
);
133115
} else {
134116
// Use regular chat/completions endpoint - call parent OpenAI implementation.
135117
yield* super._streamChat(messages, signal, options);
@@ -145,23 +127,68 @@ class Inception extends OpenAI {
145127
);
146128
}
147129

148-
private removeNextEditToken(messages: ChatMessage[]): ChatMessage[] {
130+
private isApply(messages: ChatMessage[]): boolean {
131+
return messages.some(
132+
(message) =>
133+
typeof message.content === "string" &&
134+
message.content.endsWith(APPLY_UNIQUE_TOKEN),
135+
);
136+
}
137+
138+
private removeToken(messages: ChatMessage[], token: string): ChatMessage[] {
149139
const lastMessage = messages[messages.length - 1];
150140

151141
if (
152142
typeof lastMessage?.content === "string" &&
153-
lastMessage.content.endsWith(UNIQUE_TOKEN)
143+
lastMessage.content.endsWith(token)
154144
) {
155145
const cleanedMessages = [...messages];
156146
cleanedMessages[cleanedMessages.length - 1] = {
157147
...lastMessage,
158-
content: lastMessage.content.slice(0, -UNIQUE_TOKEN.length),
148+
content: lastMessage.content.slice(0, -token.length),
159149
};
160150
return cleanedMessages;
161151
}
162152

163153
return messages;
164154
}
155+
156+
private async *streamSpecialEndpoint(
157+
path: string,
158+
messages: ChatMessage[],
159+
signal: AbortSignal,
160+
options: CompletionOptions,
161+
): AsyncGenerator<ChatMessage> {
162+
const endpoint = new URL(path, this.apiBase);
163+
164+
const resp = await this.fetch(endpoint, {
165+
method: "POST",
166+
body: JSON.stringify({
167+
model: options.model,
168+
messages,
169+
max_tokens: options.maxTokens,
170+
temperature: options.temperature,
171+
top_p: options.topP,
172+
frequency_penalty: options.frequencyPenalty,
173+
presence_penalty: options.presencePenalty,
174+
stop: options.stop,
175+
stream: true,
176+
}),
177+
headers: {
178+
"Content-Type": "application/json",
179+
Accept: "application/json",
180+
Authorization: `Bearer ${this.apiKey}`,
181+
},
182+
signal,
183+
});
184+
185+
for await (const chunk of streamSse(resp)) {
186+
const content = chunk.choices?.[0]?.delta?.content;
187+
if (content) {
188+
yield { role: "assistant", content };
189+
}
190+
}
191+
}
165192
}
166193

167194
export default Inception;

0 commit comments

Comments
 (0)