continuedev
diff --git a/‎core/core.ts‎
Lines changed: 6 additions & 13 deletions b/‎core/core.ts‎
Lines changed: 6 additions & 13 deletions
diff --git a/‎core/edit/constants.ts‎
Lines changed: 2 additions & 0 deletions b/‎core/edit/constants.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎core/edit/recursiveStream.ts‎
Lines changed: 51 additions & 3 deletions b/‎core/edit/recursiveStream.ts‎
Lines changed: 51 additions & 3 deletions
diff --git a/‎core/edit/streamDiffLines.ts‎
Lines changed: 35 additions & 23 deletions b/‎core/edit/streamDiffLines.ts‎
Lines changed: 35 additions & 23 deletions
diff --git a/‎core/index.d.ts‎
Lines changed: 16 additions & 1 deletion b/‎core/index.d.ts‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎core/llm/llms/Inception.ts‎
Lines changed: 63 additions & 36 deletions b/‎core/llm/llms/Inception.ts‎
Lines changed: 63 additions & 36 deletions
@@ -17,8 +17,8 @@ import { DataLogger } from "./data/log";
 import { CodebaseIndexer } from "./indexing/CodebaseIndexer";
 import DocsService from "./indexing/docs/DocsService";
 import { countTokens } from "./llm/countTokens";
-import Ollama from "./llm/llms/Ollama";
 import Lemonade from "./llm/llms/Lemonade";
+import Ollama from "./llm/llms/Ollama";
 import { EditAggregator } from "./nextEdit/context/aggregateEdits";
 import { createNewPromptFileV2 } from "./promptFiles/createNewPromptFile";
 import { callTool } from "./tools/callTool";
@@ -729,20 +729,13 @@ export class Core {
         data.fileUri ?? "current-file-stream",
       ); // not super important since currently cancelling apply will cancel all streams it's one file at a time
 
-      return streamDiffLines({
-        highlighted: data.highlighted,
-        prefix: data.prefix,
-        suffix: data.suffix,
+      return streamDiffLines(
+        data,
         llm,
-        // rules included for edit, NOT apply
-        rulesToInclude: data.includeRulesInSystemMessage
-          ? config.rules
-          : undefined,
-        input: data.input,
-        language: data.language,
-        overridePrompt: undefined,
         abortController,
-      });
+        undefined,
+        data.includeRulesInSystemMessage ? config.rules : undefined,
+      );
     });
 
     on("cancelApply", async (msg) => {
 
@@ -1 +1,3 @@
 export const EDIT_MODE_STREAM_ID = "edit-mode";
+
+export const APPLY_UNIQUE_TOKEN = "<|!@#IS_APPLY!@#|>";
@@ -1,7 +1,14 @@
-import { ChatMessage, ILLM, Prediction, PromptLog } from "..";
+import {
+  ChatMessage,
+  ILLM,
+  Prediction,
+  PromptLog,
+  type StreamDiffLinesType,
+} from "..";
 import { DEFAULT_MAX_TOKENS } from "../llm/constants";
 import { countTokens } from "../llm/countTokens";
 import { renderChatMessage } from "../util/messageContent";
+import { APPLY_UNIQUE_TOKEN } from "./constants.js";
 
 const INFINITE_STREAM_SAFETY = 0.9;
 
@@ -17,6 +24,7 @@ const RECURSIVE_PROMPT = `Continue EXACTLY where you left`;
 export async function* recursiveStream(
   llm: ILLM,
   abortController: AbortController,
+  type: StreamDiffLinesType,
   prompt: ChatMessage[] | string,
   prediction: Prediction | undefined,
   currentBuffer = "",
@@ -28,8 +36,11 @@ export async function* recursiveStream(
   let buffer = currentBuffer;
   // let whiteSpaceAtEndOfBuffer = buffer.match(/\s*$/)?.[0] ?? ""; // attempts at fixing whitespace bug with recursive boundaries
 
+  const injectApplyToken = type === "apply" && shouldInjectApplyToken(llm);
   if (typeof prompt === "string") {
-    const generator = llm.streamComplete(prompt, abortController.signal, {
+    const finalPrompt = injectApplyToken ? prompt + APPLY_UNIQUE_TOKEN : prompt;
+
+    const generator = llm.streamComplete(finalPrompt, abortController.signal, {
       raw: true,
       prediction: undefined,
       reasoning: false,
@@ -62,7 +73,11 @@ export async function* recursiveStream(
       }
     }
   } else {
-    const generator = llm.streamChat(prompt, abortController.signal, {
+    const promptMessages = injectApplyToken
+      ? appendTokenToLastMessage(prompt, APPLY_UNIQUE_TOKEN)
+      : prompt;
+
+    const generator = llm.streamChat(promptMessages, abortController.signal, {
       raw: true,
       prediction: undefined,
       reasoning: false,
@@ -104,3 +119,36 @@ export async function* recursiveStream(
     }
   }
 }
+
+function shouldInjectApplyToken(llm: ILLM): boolean {
+  const model = llm.model?.toLowerCase() ?? "";
+  return (
+    llm.underlyingProviderName === "inception" && model.includes("mercury")
+  );
+}
+
+function appendTokenToLastMessage(
+  messages: ChatMessage[],
+  token: string,
+): ChatMessage[] {
+  if (messages.length === 0) {
+    return messages;
+  }
+
+  const lastMessage = messages[messages.length - 1];
+  if (typeof lastMessage.content !== "string") {
+    return messages;
+  }
+
+  if (lastMessage.content.endsWith(token)) {
+    return messages;
+  }
+
+  const updatedMessages = [...messages];
+  updatedMessages[updatedMessages.length - 1] = {
+    ...lastMessage,
+    content: lastMessage.content + token,
+  };
+
+  return updatedMessages;
+}
@@ -4,6 +4,7 @@ import {
   ILLM,
   Prediction,
   RuleWithSource,
+  StreamDiffLinesPayload,
   ToolResultChatMessage,
   UserChatMessage,
 } from "../";
@@ -20,6 +21,7 @@ import { streamDiff } from "../diff/streamDiff";
 import { streamLines } from "../diff/util";
 import { getSystemMessageWithRules } from "../llm/rules/getSystemMessageWithRules";
 import { gptEditPrompt } from "../llm/templates/edit";
+import { defaultApplyPrompt } from "../llm/templates/edit/gpt";
 import { findLast } from "../util/findLast";
 import { Telemetry } from "../util/posthog";
 import { recursiveStream } from "./recursiveStream";
@@ -42,6 +44,20 @@ function constructEditPrompt(
   });
 }
 
+function constructApplyPrompt(
+  originalCode: string,
+  newCode: string,
+  llm: ILLM,
+) {
+  const template = llm.promptTemplates?.apply ?? defaultApplyPrompt;
+  const rendered = llm.renderPromptTemplate(template, [], {
+    original_code: originalCode,
+    new_code: newCode,
+  });
+
+  return rendered;
+}
+
 export async function* addIndentation(
   diffLineGenerator: AsyncGenerator<DiffLine>,
   indentation: string,
@@ -58,27 +74,15 @@ function modelIsInept(model: string): boolean {
   return !(model.includes("gpt") || model.includes("claude"));
 }
 
-export async function* streamDiffLines({
-  prefix,
-  highlighted,
-  suffix,
-  llm,
-  abortController,
-  input,
-  language,
-  overridePrompt,
-  rulesToInclude,
-}: {
-  prefix: string;
-  highlighted: string;
-  suffix: string;
-  llm: ILLM;
-  abortController: AbortController;
-  input: string;
-  language: string | undefined;
-  overridePrompt: ChatMessage[] | undefined;
-  rulesToInclude: RuleWithSource[] | undefined;
-}): AsyncGenerator<DiffLine> {
+export async function* streamDiffLines(
+  options: StreamDiffLinesPayload,
+  llm: ILLM,
+  abortController: AbortController,
+  overridePrompt: ChatMessage[] | undefined,
+  rulesToInclude: RuleWithSource[] | undefined,
+): AsyncGenerator<DiffLine> {
+  const { type, prefix, highlighted, suffix, input, language } = options;
+
   void Telemetry.capture(
     "inlineEdit",
     {
@@ -104,7 +108,9 @@ export async function* streamDiffLines({
   // For apply can be overridden with simply apply prompt
   let prompt =
     overridePrompt ??
-    constructEditPrompt(prefix, highlighted, suffix, llm, input, language);
+    (type === "apply"
+      ? constructApplyPrompt(oldLines.join("\n"), options.newCode, llm)
+      : constructEditPrompt(prefix, highlighted, suffix, llm, input, language));
 
   // Rules can be included with edit prompt
   // If any rules are present this will result in using chat instead of legacy completion
@@ -159,7 +165,13 @@ export async function* streamDiffLines({
     content: highlighted,
   };
 
-  const completion = recursiveStream(llm, abortController, prompt, prediction);
+  const completion = recursiveStream(
+    llm,
+    abortController,
+    type,
+    prompt,
+    prediction,
+  );
 
   let lines = streamLines(completion);
 
 
@@ -1393,7 +1393,9 @@ export interface ApplyState {
   autoFormattingDiff?: string;
 }
 
-export interface StreamDiffLinesPayload {
+export type StreamDiffLinesType = "edit" | "apply";
+interface StreamDiffLinesOptionsBase {
+  type: StreamDiffLinesType;
   prefix: string;
   highlighted: string;
   suffix: string;
@@ -1404,6 +1406,19 @@ export interface StreamDiffLinesPayload {
   fileUri?: string;
 }
 
+interface StreamDiffLinesOptionsEdit extends StreamDiffLinesOptionsBase {
+  type: "edit";
+}
+
+interface StreamDiffLinesOptionsApply extends StreamDiffLinesOptionsBase {
+  type: "apply";
+  newCode: string;
+}
+
+type StreamDiffLinesPayload =
+  | StreamDiffLinesOptionsApply
+  | StreamDiffLinesOptionsEdit;
+
 export interface HighlightedCodePayload {
   rangeInFileWithContents: RangeInFileWithContents;
   prompt?: string;
 
@@ -2,6 +2,7 @@ import { streamSse } from "@continuedev/fetch";
 import { ChatMessage, CompletionOptions, LLMOptions } from "../../index.js";
 
 import { ChatCompletionCreateParams } from "@continuedev/openai-adapters";
+import { APPLY_UNIQUE_TOKEN } from "../../edit/constants.js";
 import { UNIQUE_TOKEN } from "../../nextEdit/constants.js";
 import OpenAI from "./OpenAI.js";
 
@@ -96,40 +97,21 @@ class Inception extends OpenAI {
     options: CompletionOptions,
   ): AsyncGenerator<ChatMessage> {
     if (this.isNextEdit(messages)) {
-      messages = this.removeNextEditToken(messages);
-
-      // Use edit/completions endpoint.
-      const endpoint = new URL("edit/completions", this.apiBase);
-
-      const resp = await this.fetch(endpoint, {
-        method: "POST",
-        body: JSON.stringify({
-          model: options.model,
-          messages: messages,
-          max_tokens: options.maxTokens,
-          temperature: options.temperature,
-          top_p: options.topP,
-          frequency_penalty: options.frequencyPenalty,
-          presence_penalty: options.presencePenalty,
-          stop: options.stop,
-          stream: true,
-        }),
-        headers: {
-          "Content-Type": "application/json",
-          Accept: "application/json",
-          Authorization: `Bearer ${this.apiKey}`,
-        },
+      messages = this.removeToken(messages, UNIQUE_TOKEN);
+      yield* this.streamSpecialEndpoint(
+        "edit/completions",
+        messages,
         signal,
-      });
-
-      for await (const chunk of streamSse(resp)) {
-        if (chunk.choices?.[0]?.delta?.content) {
-          yield {
-            role: "assistant",
-            content: chunk.choices[0].delta.content,
-          };
-        }
-      }
+        options,
+      );
+    } else if (this.isApply(messages)) {
+      messages = this.removeToken(messages, APPLY_UNIQUE_TOKEN);
+      yield* this.streamSpecialEndpoint(
+        "apply/completions",
+        messages,
+        signal,
+        options,
+      );
     } else {
       // Use regular chat/completions endpoint - call parent OpenAI implementation.
       yield* super._streamChat(messages, signal, options);
@@ -145,23 +127,68 @@ class Inception extends OpenAI {
     );
   }
 
-  private removeNextEditToken(messages: ChatMessage[]): ChatMessage[] {
+  private isApply(messages: ChatMessage[]): boolean {
+    return messages.some(
+      (message) =>
+        typeof message.content === "string" &&
+        message.content.endsWith(APPLY_UNIQUE_TOKEN),
+    );
+  }
+
+  private removeToken(messages: ChatMessage[], token: string): ChatMessage[] {
     const lastMessage = messages[messages.length - 1];
 
     if (
       typeof lastMessage?.content === "string" &&
-      lastMessage.content.endsWith(UNIQUE_TOKEN)
+      lastMessage.content.endsWith(token)
     ) {
       const cleanedMessages = [...messages];
       cleanedMessages[cleanedMessages.length - 1] = {
         ...lastMessage,
-        content: lastMessage.content.slice(0, -UNIQUE_TOKEN.length),
+        content: lastMessage.content.slice(0, -token.length),
       };
       return cleanedMessages;
     }
 
     return messages;
   }
+
+  private async *streamSpecialEndpoint(
+    path: string,
+    messages: ChatMessage[],
+    signal: AbortSignal,
+    options: CompletionOptions,
+  ): AsyncGenerator<ChatMessage> {
+    const endpoint = new URL(path, this.apiBase);
+
+    const resp = await this.fetch(endpoint, {
+      method: "POST",
+      body: JSON.stringify({
+        model: options.model,
+        messages,
+        max_tokens: options.maxTokens,
+        temperature: options.temperature,
+        top_p: options.topP,
+        frequency_penalty: options.frequencyPenalty,
+        presence_penalty: options.presencePenalty,
+        stop: options.stop,
+        stream: true,
+      }),
+      headers: {
+        "Content-Type": "application/json",
+        Accept: "application/json",
+        Authorization: `Bearer ${this.apiKey}`,
+      },
+      signal,
+    });
+
+    for await (const chunk of streamSse(resp)) {
+      const content = chunk.choices?.[0]?.delta?.content;
+      if (content) {
+        yield { role: "assistant", content };
+      }
+    }
+  }
 }
 
 export default Inception;
Original file line number	Diff line number	Diff line change
`@@ -1 +1,3 @@`
`1`	`1`	`export const EDIT_MODE_STREAM_ID = "edit-mode";`
	`2`	`+`
	`3`	`+export const APPLY_UNIQUE_TOKEN = "<\|!@#IS_APPLY!@#\|>";`