Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@librechat/agents",
"version": "3.1.72",
"version": "3.1.73",
"main": "./dist/cjs/main.cjs",
"module": "./dist/esm/main.mjs",
"types": "./dist/types/index.d.ts",
Expand Down
13 changes: 12 additions & 1 deletion src/graphs/Graph.ts
Original file line number Diff line number Diff line change
Expand Up @@ -898,10 +898,21 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
if (
isThinkingEnabled(agentContext.provider, agentContext.clientOptions)
) {
/**
* Pass `this.startIndex` so the function can distinguish CURRENT-run
* AI messages (the agent's own iterations — possibly without a
* leading thinking block, which Claude is allowed to skip) from
* historical context that genuinely needs the
* `[Previous agent context]` placeholder. Without this signal the
* function would convert the agent's own in-run tool_use messages,
* polluting the next iteration's prompt with a placeholder the
* model treats as suspicious injected content.
*/
finalMessages = ensureThinkingBlockInMessages(
finalMessages,
agentContext.provider,
config
config,
this.startIndex
);
}

Expand Down
167 changes: 167 additions & 0 deletions src/messages/ensureThinkingBlock.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1209,4 +1209,171 @@ describe('ensureThinkingBlockInMessages', () => {
expect(outputImageBlock).not.toBe(originalImageBlock);
});
});

describe('runStartIndex (current-run boundary)', () => {
/**
* Claude is allowed to skip a thinking block before a tool_use (cf.
* PR #116). When the agent's own first iteration produces an
* `AI(tool_use, no thinking)`, the function would otherwise convert
* it to a `[Previous agent context]` HumanMessage — polluting the
* next iteration's prompt with text the model treats as suspicious
* injected content. The model then ignores its own real prior tool
* result and re-runs the tool to verify, often failing because the
* subsequent sandbox doesn't have the file.
*
* The `runStartIndex` parameter tells the function which messages
* are the agent's own in-run work: those at or after it must NEVER
* be converted, even if no thinking block appears in the chain.
*/

test('preserves the agent first-iteration AI(tool_use) when its index is at runStartIndex', () => {
const messages = [
new HumanMessage({ content: 'fetch the data' }),
// No thinking block — Claude validly skipped it before tool_use
new AIMessage({
content: '',
tool_calls: [
{ id: 'c1', name: 'fetch', args: {}, type: 'tool_call' as const },
],
}),
new ToolMessage({ content: 'data', tool_call_id: 'c1' }),
];

const result = ensureThinkingBlockInMessages(
messages,
Providers.BEDROCK,
undefined,
/* runStartIndex */ 1
);

// All 3 preserved — the AI at index 1 is the agent's own work
expect(result).toHaveLength(3);
expect(result[1]).toBeInstanceOf(AIMessage);
expect((result[1] as AIMessage).tool_calls).toHaveLength(1);
expect(result[2]).toBeInstanceOf(ToolMessage);
// No placeholder leaked in
expect(getTextContent(result[1])).not.toContain(
'[Previous agent context]'
);
});

test('preserves multiple in-run AI(tool_use) iterations without thinking blocks', () => {
const messages = [
new HumanMessage({ content: 'do work' }),
new AIMessage({
content: '',
tool_calls: [
{ id: 'c1', name: 'step1', args: {}, type: 'tool_call' as const },
],
}),
new ToolMessage({ content: 'r1', tool_call_id: 'c1' }),
new AIMessage({
content: '',
tool_calls: [
{ id: 'c2', name: 'step2', args: {}, type: 'tool_call' as const },
],
}),
new ToolMessage({ content: 'r2', tool_call_id: 'c2' }),
];

const result = ensureThinkingBlockInMessages(
messages,
Providers.BEDROCK,
undefined,
/* runStartIndex */ 1
);

expect(result).toHaveLength(5);
expect(result[1]).toBeInstanceOf(AIMessage);
expect(result[3]).toBeInstanceOf(AIMessage);
// Neither AI was converted
expect(getTextContent(result[1])).not.toContain(
'[Previous agent context]'
);
expect(getTextContent(result[3])).not.toContain(
'[Previous agent context]'
);
});

test('still converts pre-runStartIndex history that lacks thinking blocks', () => {
// Real handoff scenario: a prior non-thinking agent's tool calls
// appear before this run started. They genuinely need the
// placeholder (the legacy reason this function exists).
const messages = [
new HumanMessage({ content: 'first request' }),
new AIMessage({
content: 'using tool',
tool_calls: [
{ id: 'old', name: 'legacy', args: {}, type: 'tool_call' as const },
],
}),
new ToolMessage({ content: 'old result', tool_call_id: 'old' }),
// Current run starts here — say after a handoff. Index >= 3 is
// the new agent's own work.
];

const result = ensureThinkingBlockInMessages(
messages,
Providers.BEDROCK,
undefined,
/* runStartIndex */ 3
);

// The pre-run AI(tool_use)+Tool got converted to a placeholder
expect(result).toHaveLength(2);
expect(result[0]).toBeInstanceOf(HumanMessage);
expect(result[1]).toBeInstanceOf(HumanMessage);
expect(getTextContent(result[1])).toContain('[Previous agent context]');
});

test('falls back to chainHasThinkingBlock heuristic when runStartIndex is undefined (backward compat)', () => {
const messages = [
new HumanMessage({ content: 'do work' }),
// No reasoning + no runStartIndex hint → still gets converted
// (preserves the prior behavior for callers that haven't been
// updated to pass the boundary).
new AIMessage({
content: 'using tool',
tool_calls: [
{ id: 'c1', name: 'tool', args: {}, type: 'tool_call' as const },
],
}),
new ToolMessage({ content: 'r', tool_call_id: 'c1' }),
];

const result = ensureThinkingBlockInMessages(messages, Providers.BEDROCK);

expect(result).toHaveLength(2);
expect(result[1]).toBeInstanceOf(HumanMessage);
expect(getTextContent(result[1])).toContain('[Previous agent context]');
});

test('runStartIndex of 0 is honored (whole array is the current run)', () => {
// Edge: a fresh run with no prior history at all. Everything is
// in-run and must be preserved even without thinking blocks.
const messages = [
new HumanMessage({ content: 'do work' }),
new AIMessage({
content: '',
tool_calls: [
{ id: 'c1', name: 'tool', args: {}, type: 'tool_call' as const },
],
}),
new ToolMessage({ content: 'r', tool_call_id: 'c1' }),
];

const result = ensureThinkingBlockInMessages(
messages,
Providers.BEDROCK,
undefined,
/* runStartIndex */ 0
);

expect(result).toHaveLength(3);
expect(result[1]).toBeInstanceOf(AIMessage);
expect(getTextContent(result[1])).not.toContain(
'[Previous agent context]'
);
});
});
});
30 changes: 29 additions & 1 deletion src/messages/format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1391,12 +1391,23 @@ function appendToolCalls(
* @param messages - Array of messages to process
* @param provider - The provider being used (unused but kept for future compatibility)
* @param config - Optional RunnableConfig for structured agent logging
* @param runStartIndex - Index in `messages` where the CURRENT run's own
* appended AI/Tool messages begin (i.e. anything at this index or later
* was just produced by this run's own iterations, not historical
* context). When provided, AI messages at or after this index are
* never converted to `[Previous agent context]` placeholders — Claude
* can validly skip a thinking block before a tool_use (cf. PR #116),
* so the agent's own in-run iterations must not be misclassified as
* foreign history. Without the signal the function falls back to its
* prior heuristic (`chainHasThinkingBlock`), preserving backward
* compatibility for callers that don't yet pass the boundary.
* @returns The messages array with tool sequences converted to buffer strings if necessary
*/
export function ensureThinkingBlockInMessages(
messages: BaseMessage[],
_provider: Providers,
config?: RunnableConfig
config?: RunnableConfig,
runStartIndex?: number
): BaseMessage[] {
if (messages.length === 0) {
return messages;
Expand Down Expand Up @@ -1483,6 +1494,23 @@ export function ensureThinkingBlockInMessages(
// but follow-ups have content: "" with only tool_calls. These are the
// same agent's turn and must NOT be converted to HumanMessages.
if (hasToolUse && !hasThinkingBlock) {
// Current-run boundary check: anything at or after `runStartIndex`
// is the current run's own work — preserve it. Claude is allowed
// to skip a thinking block before a tool_use (cf. PR #116 in the
// agents repo), so the agent's own first-iteration AI message can
// legitimately have tool_calls without reasoning. Converting it to
// a `[Previous agent context]` placeholder pollutes the next
// iteration's prompt — the LLM sees the placeholder, treats it as
// suspicious injected content, ignores its own real prior tool
// result, and re-runs the tool to verify (which then often fails
// because subsequent calls land in fresh sandboxes without the
// file). Skip the conversion when we know this is in-run.
if (runStartIndex !== undefined && i >= runStartIndex) {
result.push(msg);
i++;
continue;
}

// Walk backwards — if an earlier AI message in the same chain (before
// the nearest HumanMessage) has a thinking/reasoning block, this is a
// continuation of a thinking-enabled turn, not a non-thinking handoff.
Expand Down
23 changes: 21 additions & 2 deletions src/tools/ToolNode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,26 @@ function isSend(value: unknown): value is Send {
return value instanceof Send;
}

/** Merges code execution session context into the sessions map. */
/**
* Merges code execution session context into the sessions map.
*
* The codeapi worker reports two distinct ids on a code-execution result:
* - `artifact.session_id` (the `sessionId` arg here) is the EXEC session
* — the sandbox VM that ran the code. It's transient and torn down
* post-execution; subsequent calls cannot reuse it as a sandbox.
* - `file.session_id` on each `artifact.files[i]` is the STORAGE
* session — the file-server bucket prefix where the artifact actually
* lives and is served from.
*
* Per-file `session_id` is preserved (not overwritten with the exec id)
* because `_injected_files` are looked up against the file-server's
* storage path on subsequent tool calls. Stomping the storage id with
* the exec id silently 404s every follow-up tool call within the same
* run — `cat /mnt/data/foo.txt` reports "No such file or directory"
* because the worker can't mount a file at a path the storage doesn't
* know about. Fall back to `sessionId` only when the per-file id is
* absent (older worker payloads).
*/
function updateCodeSession(
sessions: t.ToolSessionMap,
sessionId: string,
Expand All @@ -104,7 +123,7 @@ function updateCodeSession(
if (newFiles.length > 0) {
const filesWithSession: t.FileRefs = newFiles.map((file) => ({
...file,
session_id: sessionId,
session_id: file.session_id ?? sessionId,
}));
const newFileNames = new Set(filesWithSession.map((f) => f.name));
const filteredExisting = existingFiles.filter(
Expand Down
Loading
Loading