innFactory · pull · Apr 27, 2026 · Apr 26, 2026
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@librechat/agents",
-  "version": "3.1.72",
+  "version": "3.1.73",
   "main": "./dist/cjs/main.cjs",
   "module": "./dist/esm/main.mjs",
   "types": "./dist/types/index.d.ts",

diff --git a/src/graphs/Graph.ts b/src/graphs/Graph.ts
@@ -898,10 +898,21 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
       if (
         isThinkingEnabled(agentContext.provider, agentContext.clientOptions)
       ) {
+        /**
+         * Pass `this.startIndex` so the function can distinguish CURRENT-run
+         * AI messages (the agent's own iterations — possibly without a
+         * leading thinking block, which Claude is allowed to skip) from
+         * historical context that genuinely needs the
+         * `[Previous agent context]` placeholder. Without this signal the
+         * function would convert the agent's own in-run tool_use messages,
+         * polluting the next iteration's prompt with a placeholder the
+         * model treats as suspicious injected content.
+         */
         finalMessages = ensureThinkingBlockInMessages(
           finalMessages,
           agentContext.provider,
-          config
+          config,
+          this.startIndex
         );
       }
 

diff --git a/src/messages/ensureThinkingBlock.test.ts b/src/messages/ensureThinkingBlock.test.ts
@@ -1209,4 +1209,171 @@ describe('ensureThinkingBlockInMessages', () => {
       expect(outputImageBlock).not.toBe(originalImageBlock);
     });
   });
+
+  describe('runStartIndex (current-run boundary)', () => {
+    /**
+     * Claude is allowed to skip a thinking block before a tool_use (cf.
+     * PR #116). When the agent's own first iteration produces an
+     * `AI(tool_use, no thinking)`, the function would otherwise convert
+     * it to a `[Previous agent context]` HumanMessage — polluting the
+     * next iteration's prompt with text the model treats as suspicious
+     * injected content. The model then ignores its own real prior tool
+     * result and re-runs the tool to verify, often failing because the
+     * subsequent sandbox doesn't have the file.
+     *
+     * The `runStartIndex` parameter tells the function which messages
+     * are the agent's own in-run work: those at or after it must NEVER
+     * be converted, even if no thinking block appears in the chain.
+     */
+
+    test('preserves the agent first-iteration AI(tool_use) when its index is at runStartIndex', () => {
+      const messages = [
+        new HumanMessage({ content: 'fetch the data' }),
+        // No thinking block — Claude validly skipped it before tool_use
+        new AIMessage({
+          content: '',
+          tool_calls: [
+            { id: 'c1', name: 'fetch', args: {}, type: 'tool_call' as const },
+          ],
+        }),
+        new ToolMessage({ content: 'data', tool_call_id: 'c1' }),
+      ];
+
+      const result = ensureThinkingBlockInMessages(
+        messages,
+        Providers.BEDROCK,
+        undefined,
+        /* runStartIndex */ 1
+      );
+
+      // All 3 preserved — the AI at index 1 is the agent's own work
+      expect(result).toHaveLength(3);
+      expect(result[1]).toBeInstanceOf(AIMessage);
+      expect((result[1] as AIMessage).tool_calls).toHaveLength(1);
+      expect(result[2]).toBeInstanceOf(ToolMessage);
+      // No placeholder leaked in
+      expect(getTextContent(result[1])).not.toContain(
+        '[Previous agent context]'
+      );
+    });
+
+    test('preserves multiple in-run AI(tool_use) iterations without thinking blocks', () => {
+      const messages = [
+        new HumanMessage({ content: 'do work' }),
+        new AIMessage({
+          content: '',
+          tool_calls: [
+            { id: 'c1', name: 'step1', args: {}, type: 'tool_call' as const },
+          ],
+        }),
+        new ToolMessage({ content: 'r1', tool_call_id: 'c1' }),
+        new AIMessage({
+          content: '',
+          tool_calls: [
+            { id: 'c2', name: 'step2', args: {}, type: 'tool_call' as const },
+          ],
+        }),
+        new ToolMessage({ content: 'r2', tool_call_id: 'c2' }),
+      ];
+
+      const result = ensureThinkingBlockInMessages(
+        messages,
+        Providers.BEDROCK,
+        undefined,
+        /* runStartIndex */ 1
+      );
+
+      expect(result).toHaveLength(5);
+      expect(result[1]).toBeInstanceOf(AIMessage);
+      expect(result[3]).toBeInstanceOf(AIMessage);
+      // Neither AI was converted
+      expect(getTextContent(result[1])).not.toContain(
+        '[Previous agent context]'
+      );
+      expect(getTextContent(result[3])).not.toContain(
+        '[Previous agent context]'
+      );
+    });
+
+    test('still converts pre-runStartIndex history that lacks thinking blocks', () => {
+      // Real handoff scenario: a prior non-thinking agent's tool calls
+      // appear before this run started. They genuinely need the
+      // placeholder (the legacy reason this function exists).
+      const messages = [
+        new HumanMessage({ content: 'first request' }),
+        new AIMessage({
+          content: 'using tool',
+          tool_calls: [
+            { id: 'old', name: 'legacy', args: {}, type: 'tool_call' as const },
+          ],
+        }),
+        new ToolMessage({ content: 'old result', tool_call_id: 'old' }),
+        // Current run starts here — say after a handoff. Index >= 3 is
+        // the new agent's own work.
+      ];
+
+      const result = ensureThinkingBlockInMessages(
+        messages,
+        Providers.BEDROCK,
+        undefined,
+        /* runStartIndex */ 3
+      );
+
+      // The pre-run AI(tool_use)+Tool got converted to a placeholder
+      expect(result).toHaveLength(2);
+      expect(result[0]).toBeInstanceOf(HumanMessage);
+      expect(result[1]).toBeInstanceOf(HumanMessage);
+      expect(getTextContent(result[1])).toContain('[Previous agent context]');
+    });
+
+    test('falls back to chainHasThinkingBlock heuristic when runStartIndex is undefined (backward compat)', () => {
+      const messages = [
+        new HumanMessage({ content: 'do work' }),
+        // No reasoning + no runStartIndex hint → still gets converted
+        // (preserves the prior behavior for callers that haven't been
+        // updated to pass the boundary).
+        new AIMessage({
+          content: 'using tool',
+          tool_calls: [
+            { id: 'c1', name: 'tool', args: {}, type: 'tool_call' as const },
+          ],
+        }),
+        new ToolMessage({ content: 'r', tool_call_id: 'c1' }),
+      ];
+
+      const result = ensureThinkingBlockInMessages(messages, Providers.BEDROCK);
+
+      expect(result).toHaveLength(2);
+      expect(result[1]).toBeInstanceOf(HumanMessage);
+      expect(getTextContent(result[1])).toContain('[Previous agent context]');
+    });
+
+    test('runStartIndex of 0 is honored (whole array is the current run)', () => {
+      // Edge: a fresh run with no prior history at all. Everything is
+      // in-run and must be preserved even without thinking blocks.
+      const messages = [
+        new HumanMessage({ content: 'do work' }),
+        new AIMessage({
+          content: '',
+          tool_calls: [
+            { id: 'c1', name: 'tool', args: {}, type: 'tool_call' as const },
+          ],
+        }),
+        new ToolMessage({ content: 'r', tool_call_id: 'c1' }),
+      ];
+
+      const result = ensureThinkingBlockInMessages(
+        messages,
+        Providers.BEDROCK,
+        undefined,
+        /* runStartIndex */ 0
+      );
+
+      expect(result).toHaveLength(3);
+      expect(result[1]).toBeInstanceOf(AIMessage);
+      expect(getTextContent(result[1])).not.toContain(
+        '[Previous agent context]'
+      );
+    });
+  });
 });
diff --git a/src/messages/format.ts b/src/messages/format.ts
@@ -1391,12 +1391,23 @@ function appendToolCalls(
  * @param messages - Array of messages to process
  * @param provider - The provider being used (unused but kept for future compatibility)
  * @param config - Optional RunnableConfig for structured agent logging
+ * @param runStartIndex - Index in `messages` where the CURRENT run's own
+ *   appended AI/Tool messages begin (i.e. anything at this index or later
+ *   was just produced by this run's own iterations, not historical
+ *   context). When provided, AI messages at or after this index are
+ *   never converted to `[Previous agent context]` placeholders — Claude
+ *   can validly skip a thinking block before a tool_use (cf. PR #116),
+ *   so the agent's own in-run iterations must not be misclassified as
+ *   foreign history. Without the signal the function falls back to its
+ *   prior heuristic (`chainHasThinkingBlock`), preserving backward
+ *   compatibility for callers that don't yet pass the boundary.
  * @returns The messages array with tool sequences converted to buffer strings if necessary
  */
 export function ensureThinkingBlockInMessages(
   messages: BaseMessage[],
   _provider: Providers,
-  config?: RunnableConfig
+  config?: RunnableConfig,
+  runStartIndex?: number
 ): BaseMessage[] {
   if (messages.length === 0) {
     return messages;
@@ -1483,6 +1494,23 @@ export function ensureThinkingBlockInMessages(
     // but follow-ups have content: "" with only tool_calls. These are the
     // same agent's turn and must NOT be converted to HumanMessages.
     if (hasToolUse && !hasThinkingBlock) {
+      // Current-run boundary check: anything at or after `runStartIndex`
+      // is the current run's own work — preserve it. Claude is allowed
+      // to skip a thinking block before a tool_use (cf. PR #116 in the
+      // agents repo), so the agent's own first-iteration AI message can
+      // legitimately have tool_calls without reasoning. Converting it to
+      // a `[Previous agent context]` placeholder pollutes the next
+      // iteration's prompt — the LLM sees the placeholder, treats it as
+      // suspicious injected content, ignores its own real prior tool
+      // result, and re-runs the tool to verify (which then often fails
+      // because subsequent calls land in fresh sandboxes without the
+      // file). Skip the conversion when we know this is in-run.
+      if (runStartIndex !== undefined && i >= runStartIndex) {
+        result.push(msg);
+        i++;
+        continue;
+      }
+
       // Walk backwards — if an earlier AI message in the same chain (before
       // the nearest HumanMessage) has a thinking/reasoning block, this is a
       // continuation of a thinking-enabled turn, not a non-thinking handoff.

diff --git a/src/tools/ToolNode.ts b/src/tools/ToolNode.ts
@@ -89,7 +89,26 @@ function isSend(value: unknown): value is Send {
   return value instanceof Send;
 }
 
-/** Merges code execution session context into the sessions map. */
+/**
+ * Merges code execution session context into the sessions map.
+ *
+ * The codeapi worker reports two distinct ids on a code-execution result:
+ *  - `artifact.session_id` (the `sessionId` arg here) is the EXEC session
+ *    — the sandbox VM that ran the code. It's transient and torn down
+ *    post-execution; subsequent calls cannot reuse it as a sandbox.
+ *  - `file.session_id` on each `artifact.files[i]` is the STORAGE
+ *    session — the file-server bucket prefix where the artifact actually
+ *    lives and is served from.
+ *
+ * Per-file `session_id` is preserved (not overwritten with the exec id)
+ * because `_injected_files` are looked up against the file-server's
+ * storage path on subsequent tool calls. Stomping the storage id with
+ * the exec id silently 404s every follow-up tool call within the same
+ * run — `cat /mnt/data/foo.txt` reports "No such file or directory"
+ * because the worker can't mount a file at a path the storage doesn't
+ * know about. Fall back to `sessionId` only when the per-file id is
+ * absent (older worker payloads).
+ */
 function updateCodeSession(
   sessions: t.ToolSessionMap,
   sessionId: string,
@@ -104,7 +123,7 @@ function updateCodeSession(
   if (newFiles.length > 0) {
     const filesWithSession: t.FileRefs = newFiles.map((file) => ({
       ...file,
-      session_id: sessionId,
+      session_id: file.session_id ?? sessionId,
     }));
     const newFileNames = new Set(filesWithSession.map((f) => f.name));
     const filteredExisting = existingFiles.filter(