From 73694891ebdf9b0afa3b9f4062b0df139747b2fd Mon Sep 17 00:00:00 2001 From: Marcello Fitton Date: Thu, 21 May 2026 14:20:37 -0700 Subject: [PATCH] parse reasoning_content in agent tool-calling helpers --- .../aibitat/providers/helpers/tooled.js | 49 ++++++++- .../aibitat/providers/helpers/untooled.js | 99 +++++++++++++++++-- 2 files changed, 139 insertions(+), 9 deletions(-) diff --git a/server/utils/agents/aibitat/providers/helpers/tooled.js b/server/utils/agents/aibitat/providers/helpers/tooled.js index 945ee9a2a..cd802c25f 100644 --- a/server/utils/agents/aibitat/providers/helpers/tooled.js +++ b/server/utils/agents/aibitat/providers/helpers/tooled.js @@ -205,6 +205,21 @@ async function tooledStream( const toolCallsByIndex = {}; let usage = null; + let reasoningText = ""; + + // Emit a `` chunk and reset reasoning state. Used whenever we + // transition out of a reasoning stretch (into visible text, into a tool + // call, or at end of stream) so the frontend regex stays balanced. + const closeReasoningIfOpen = () => { + if (reasoningText.length === 0) return; + result.textResponse += ""; + eventHandler?.("reportStreamEvent", { + type: "textResponseChunk", + uuid: msgUUID, + content: "", + }); + reasoningText = ""; + }; for await (const chunk of stream) { // Capture usage from final chunk (some providers send usage after finish_reason) @@ -215,7 +230,26 @@ async function tooledStream( if (!chunk?.choices?.[0]) continue; const choice = chunk.choices[0]; + // Reasoning models (LM Studio, Lemonade, DeepSeek, etc.) emit thinking + // tokens via `delta.reasoning_content`. Wrap them in ... + // so the frontend's ThoughtContainer collapses them into a pane. + const reasoningToken = choice.delta?.reasoning_content; + if (reasoningToken) { + const wrappedChunk = + reasoningText.length === 0 + ? `${reasoningToken}` + : reasoningToken; + reasoningText += reasoningToken; + result.textResponse += wrappedChunk; + eventHandler?.("reportStreamEvent", { + type: "textResponseChunk", + uuid: msgUUID, + content: wrappedChunk, + }); + } + if (choice.delta?.content) { + closeReasoningIfOpen(); result.textResponse += choice.delta.content; eventHandler?.("reportStreamEvent", { type: "textResponseChunk", @@ -225,6 +259,7 @@ async function tooledStream( } if (choice.delta?.tool_calls) { + closeReasoningIfOpen(); for (const toolCall of choice.delta.tool_calls) { const idx = toolCall.index ?? 0; @@ -260,6 +295,10 @@ async function tooledStream( } } + // Defensive close in case the stream ended mid-reasoning (e.g. abort, or a + // provider that emits reasoning but no follow-up content/tool_call). + closeReasoningIfOpen(); + // Auto-record usage if provider is passed and usage is available if (provider?.recordUsage && usage) { try { @@ -371,8 +410,16 @@ async function tooledComplete( }; } + // Wrap any reasoning content in ... so the frontend can + // collapse it into a thought pane, matching the streaming path. + const reasoning = completion.reasoning_content; + const textResponse = + reasoning && reasoning.trim().length > 0 + ? `${reasoning}${completion.content ?? ""}` + : completion.content; + return { - textResponse: completion.content, + textResponse, cost, usage, }; diff --git a/server/utils/agents/aibitat/providers/helpers/untooled.js b/server/utils/agents/aibitat/providers/helpers/untooled.js index 0ecc052c0..5409081f3 100644 --- a/server/utils/agents/aibitat/providers/helpers/untooled.js +++ b/server/utils/agents/aibitat/providers/helpers/untooled.js @@ -190,7 +190,16 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`; if (history[history.length - 1].role !== "user") return null; const msgUUID = v4(); + // `textResponse` stays content-only so safeJsonParse below can still + // match a tool-call JSON payload. `displayedResponse` mirrors what was + // shown to the user — reasoning wrapped in ... followed + // by content — and is returned as the text response when no tool call + // is parsed. The live status bubble uses human-readable framing + // ("Thinking:" / "Done thinking.") instead of raw tags because the + // StatusResponse component renders text literally. let textResponse = ""; + let displayedResponse = ""; + let reasoningText = ""; const historyMessages = this.buildToolCallMessages(history, functions); const stream = await chatCb({ messages: historyMessages }); @@ -204,19 +213,54 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`; if (!chunk?.choices?.[0]) continue; // Skip if no choices const choice = chunk.choices[0]; - if (choice.delta?.content) { - textResponse += choice.delta.content; + const reasoningToken = choice.delta?.reasoning_content; + if (reasoningToken) { + const liveChunk = + reasoningText.length === 0 + ? `Thinking:\n\n${reasoningToken}` + : reasoningToken; + displayedResponse += + reasoningText.length === 0 + ? `${reasoningToken}` + : reasoningToken; + reasoningText += reasoningToken; eventHandler?.("reportStreamEvent", { type: "statusResponse", uuid: msgUUID, - content: choice.delta.content, + content: liveChunk, + }); + } + + if (choice.delta?.content) { + const closingReasoning = reasoningText.length > 0; + const liveChunk = closingReasoning + ? `\n\nDone thinking.\n\n${choice.delta.content}` + : choice.delta.content; + if (closingReasoning) { + displayedResponse += ``; + reasoningText = ""; + } + textResponse += choice.delta.content; + displayedResponse += choice.delta.content; + eventHandler?.("reportStreamEvent", { + type: "statusResponse", + uuid: msgUUID, + content: liveChunk, }); } } + // Stream ended while still inside a reasoning block (e.g. model + // produced only reasoning then stopped). Close the tag in the + // returned text so the frontend regex stays balanced. + if (reasoningText.length > 0) { + displayedResponse += ``; + reasoningText = ""; + } + const call = safeJsonParse(textResponse, null); if (call === null) - return { toolCall: null, text: textResponse, uuid: msgUUID }; // failed to parse, so must be regular text response. + return { toolCall: null, text: displayedResponse, uuid: msgUUID }; // failed to parse, so must be regular text response. const { valid, reason } = this.validFuncCall(call, functions); if (!valid) { @@ -335,14 +379,43 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`; ); const msgUUID = v4(); completion = { content: "" }; + let reasoningText = ""; const stream = await chatCallback({ messages: this.cleanMsgs(messages), }); + const closeReasoningIfOpen = () => { + if (reasoningText.length === 0) return; + completion.content += ""; + eventHandler?.("reportStreamEvent", { + type: "textResponseChunk", + uuid: msgUUID, + content: "", + }); + reasoningText = ""; + }; + for await (const chunk of stream) { if (!chunk?.choices?.[0]) continue; // Skip if no choices const choice = chunk.choices[0]; + + const reasoningToken = choice.delta?.reasoning_content; + if (reasoningToken) { + const wrappedChunk = + reasoningText.length === 0 + ? `${reasoningToken}` + : reasoningToken; + reasoningText += reasoningToken; + completion.content += wrappedChunk; + eventHandler?.("reportStreamEvent", { + type: "textResponseChunk", + uuid: msgUUID, + content: wrappedChunk, + }); + } + if (choice.delta?.content) { + closeReasoningIfOpen(); completion.content += choice.delta.content; eventHandler?.("reportStreamEvent", { type: "textResponseChunk", @@ -351,6 +424,8 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`; }); } } + + closeReasoningIfOpen(); } // The UnTooled class inherited Deduplicator is mostly useful to prevent the agent @@ -414,10 +489,18 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`; // If the response from the callback is the raw OpenAI Spec response object, we can use that directly. // Otherwise, we will assume the response is just the string output we wanted (see: `#handleFunctionCallChat` which returns the content only) // This handles both streaming and non-streaming completions. - completion = - typeof response === "string" - ? { content: response } - : response.choices?.[0]?.message; + if (typeof response === "string") { + completion = { content: response }; + } else { + const message = response.choices?.[0]?.message ?? {}; + const reasoning = message.reasoning_content; + completion = { + content: + reasoning && reasoning.trim().length > 0 + ? `${reasoning}${message.content ?? ""}` + : message.content, + }; + } } // The UnTooled class inherited Deduplicator is mostly useful to prevent the agent