parse reasoning_content in agent tool-calling helpers

This commit is contained in:
Marcello Fitton
2026-05-21 14:20:37 -07:00
parent 03fe24a687
commit 73694891eb
2 changed files with 139 additions and 9 deletions

View File

@@ -205,6 +205,21 @@ async function tooledStream(
const toolCallsByIndex = {};
let usage = null;
let reasoningText = "";
// Emit a `</think>` chunk and reset reasoning state. Used whenever we
// transition out of a reasoning stretch (into visible text, into a tool
// call, or at end of stream) so the frontend regex stays balanced.
const closeReasoningIfOpen = () => {
if (reasoningText.length === 0) return;
result.textResponse += "</think>";
eventHandler?.("reportStreamEvent", {
type: "textResponseChunk",
uuid: msgUUID,
content: "</think>",
});
reasoningText = "";
};
for await (const chunk of stream) {
// Capture usage from final chunk (some providers send usage after finish_reason)
@@ -215,7 +230,26 @@ async function tooledStream(
if (!chunk?.choices?.[0]) continue;
const choice = chunk.choices[0];
// Reasoning models (LM Studio, Lemonade, DeepSeek, etc.) emit thinking
// tokens via `delta.reasoning_content`. Wrap them in <think>...</think>
// so the frontend's ThoughtContainer collapses them into a pane.
const reasoningToken = choice.delta?.reasoning_content;
if (reasoningToken) {
const wrappedChunk =
reasoningText.length === 0
? `<think>${reasoningToken}`
: reasoningToken;
reasoningText += reasoningToken;
result.textResponse += wrappedChunk;
eventHandler?.("reportStreamEvent", {
type: "textResponseChunk",
uuid: msgUUID,
content: wrappedChunk,
});
}
if (choice.delta?.content) {
closeReasoningIfOpen();
result.textResponse += choice.delta.content;
eventHandler?.("reportStreamEvent", {
type: "textResponseChunk",
@@ -225,6 +259,7 @@ async function tooledStream(
}
if (choice.delta?.tool_calls) {
closeReasoningIfOpen();
for (const toolCall of choice.delta.tool_calls) {
const idx = toolCall.index ?? 0;
@@ -260,6 +295,10 @@ async function tooledStream(
}
}
// Defensive close in case the stream ended mid-reasoning (e.g. abort, or a
// provider that emits reasoning but no follow-up content/tool_call).
closeReasoningIfOpen();
// Auto-record usage if provider is passed and usage is available
if (provider?.recordUsage && usage) {
try {
@@ -371,8 +410,16 @@ async function tooledComplete(
};
}
// Wrap any reasoning content in <think>...</think> so the frontend can
// collapse it into a thought pane, matching the streaming path.
const reasoning = completion.reasoning_content;
const textResponse =
reasoning && reasoning.trim().length > 0
? `<think>${reasoning}</think>${completion.content ?? ""}`
: completion.content;
return {
textResponse: completion.content,
textResponse,
cost,
usage,
};

View File

@@ -190,7 +190,16 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`;
if (history[history.length - 1].role !== "user") return null;
const msgUUID = v4();
// `textResponse` stays content-only so safeJsonParse below can still
// match a tool-call JSON payload. `displayedResponse` mirrors what was
// shown to the user — reasoning wrapped in <think>...</think> followed
// by content — and is returned as the text response when no tool call
// is parsed. The live status bubble uses human-readable framing
// ("Thinking:" / "Done thinking.") instead of raw tags because the
// StatusResponse component renders text literally.
let textResponse = "";
let displayedResponse = "";
let reasoningText = "";
const historyMessages = this.buildToolCallMessages(history, functions);
const stream = await chatCb({ messages: historyMessages });
@@ -204,19 +213,54 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`;
if (!chunk?.choices?.[0]) continue; // Skip if no choices
const choice = chunk.choices[0];
if (choice.delta?.content) {
textResponse += choice.delta.content;
const reasoningToken = choice.delta?.reasoning_content;
if (reasoningToken) {
const liveChunk =
reasoningText.length === 0
? `Thinking:\n\n${reasoningToken}`
: reasoningToken;
displayedResponse +=
reasoningText.length === 0
? `<think>${reasoningToken}`
: reasoningToken;
reasoningText += reasoningToken;
eventHandler?.("reportStreamEvent", {
type: "statusResponse",
uuid: msgUUID,
content: choice.delta.content,
content: liveChunk,
});
}
if (choice.delta?.content) {
const closingReasoning = reasoningText.length > 0;
const liveChunk = closingReasoning
? `\n\nDone thinking.\n\n${choice.delta.content}`
: choice.delta.content;
if (closingReasoning) {
displayedResponse += `</think>`;
reasoningText = "";
}
textResponse += choice.delta.content;
displayedResponse += choice.delta.content;
eventHandler?.("reportStreamEvent", {
type: "statusResponse",
uuid: msgUUID,
content: liveChunk,
});
}
}
// Stream ended while still inside a reasoning block (e.g. model
// produced only reasoning then stopped). Close the tag in the
// returned text so the frontend regex stays balanced.
if (reasoningText.length > 0) {
displayedResponse += `</think>`;
reasoningText = "";
}
const call = safeJsonParse(textResponse, null);
if (call === null)
return { toolCall: null, text: textResponse, uuid: msgUUID }; // failed to parse, so must be regular text response.
return { toolCall: null, text: displayedResponse, uuid: msgUUID }; // failed to parse, so must be regular text response.
const { valid, reason } = this.validFuncCall(call, functions);
if (!valid) {
@@ -335,14 +379,43 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`;
);
const msgUUID = v4();
completion = { content: "" };
let reasoningText = "";
const stream = await chatCallback({
messages: this.cleanMsgs(messages),
});
const closeReasoningIfOpen = () => {
if (reasoningText.length === 0) return;
completion.content += "</think>";
eventHandler?.("reportStreamEvent", {
type: "textResponseChunk",
uuid: msgUUID,
content: "</think>",
});
reasoningText = "";
};
for await (const chunk of stream) {
if (!chunk?.choices?.[0]) continue; // Skip if no choices
const choice = chunk.choices[0];
const reasoningToken = choice.delta?.reasoning_content;
if (reasoningToken) {
const wrappedChunk =
reasoningText.length === 0
? `<think>${reasoningToken}`
: reasoningToken;
reasoningText += reasoningToken;
completion.content += wrappedChunk;
eventHandler?.("reportStreamEvent", {
type: "textResponseChunk",
uuid: msgUUID,
content: wrappedChunk,
});
}
if (choice.delta?.content) {
closeReasoningIfOpen();
completion.content += choice.delta.content;
eventHandler?.("reportStreamEvent", {
type: "textResponseChunk",
@@ -351,6 +424,8 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`;
});
}
}
closeReasoningIfOpen();
}
// The UnTooled class inherited Deduplicator is mostly useful to prevent the agent
@@ -414,10 +489,18 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`;
// If the response from the callback is the raw OpenAI Spec response object, we can use that directly.
// Otherwise, we will assume the response is just the string output we wanted (see: `#handleFunctionCallChat` which returns the content only)
// This handles both streaming and non-streaming completions.
completion =
typeof response === "string"
? { content: response }
: response.choices?.[0]?.message;
if (typeof response === "string") {
completion = { content: response };
} else {
const message = response.choices?.[0]?.message ?? {};
const reasoning = message.reasoning_content;
completion = {
content:
reasoning && reasoning.trim().length > 0
? `<think>${reasoning}</think>${message.content ?? ""}`
: message.content,
};
}
}
// The UnTooled class inherited Deduplicator is mostly useful to prevent the agent