From 73694891ebdf9b0afa3b9f4062b0df139747b2fd Mon Sep 17 00:00:00 2001
From: Marcello Fitton <macfittondev@gmail.com>
Date: Thu, 21 May 2026 14:20:37 -0700
Subject: [PATCH] parse reasoning_content in agent tool-calling helpers

---
 .../aibitat/providers/helpers/tooled.js       | 49 ++++++++-
 .../aibitat/providers/helpers/untooled.js     | 99 +++++++++++++++++--
 2 files changed, 139 insertions(+), 9 deletions(-)
diff --git a/server/utils/agents/aibitat/providers/helpers/tooled.js b/server/utils/agents/aibitat/providers/helpers/tooled.js
index 945ee9a2a..cd802c25f 100644
--- a/server/utils/agents/aibitat/providers/helpers/tooled.js
+++ b/server/utils/agents/aibitat/providers/helpers/tooled.js
@@ -205,6 +205,21 @@ async function tooledStream(
 
   const toolCallsByIndex = {};
   let usage = null;
+  let reasoningText = "";
+
+  // Emit a `</think>` chunk and reset reasoning state. Used whenever we
+  // transition out of a reasoning stretch (into visible text, into a tool
+  // call, or at end of stream) so the frontend regex stays balanced.
+  const closeReasoningIfOpen = () => {
+    if (reasoningText.length === 0) return;
+    result.textResponse += "</think>";
+    eventHandler?.("reportStreamEvent", {
+      type: "textResponseChunk",
+      uuid: msgUUID,
+      content: "</think>",
+    });
+    reasoningText = "";
+  };
 
   for await (const chunk of stream) {
     // Capture usage from final chunk (some providers send usage after finish_reason)
@@ -215,7 +230,26 @@ async function tooledStream(
     if (!chunk?.choices?.[0]) continue;
     const choice = chunk.choices[0];
 
+    // Reasoning models (LM Studio, Lemonade, DeepSeek, etc.) emit thinking
+    // tokens via `delta.reasoning_content`. Wrap them in <think>...</think>
+    // so the frontend's ThoughtContainer collapses them into a pane.
+    const reasoningToken = choice.delta?.reasoning_content;
+    if (reasoningToken) {
+      const wrappedChunk =
+        reasoningText.length === 0
+          ? `<think>${reasoningToken}`
+          : reasoningToken;
+      reasoningText += reasoningToken;
+      result.textResponse += wrappedChunk;
+      eventHandler?.("reportStreamEvent", {
+        type: "textResponseChunk",
+        uuid: msgUUID,
+        content: wrappedChunk,
+      });
+    }
+
     if (choice.delta?.content) {
+      closeReasoningIfOpen();
       result.textResponse += choice.delta.content;
       eventHandler?.("reportStreamEvent", {
         type: "textResponseChunk",
@@ -225,6 +259,7 @@ async function tooledStream(
     }
 
     if (choice.delta?.tool_calls) {
+      closeReasoningIfOpen();
       for (const toolCall of choice.delta.tool_calls) {
         const idx = toolCall.index ?? 0;
 
@@ -260,6 +295,10 @@ async function tooledStream(
     }
   }
 
+  // Defensive close in case the stream ended mid-reasoning (e.g. abort, or a
+  // provider that emits reasoning but no follow-up content/tool_call).
+  closeReasoningIfOpen();
+
   // Auto-record usage if provider is passed and usage is available
   if (provider?.recordUsage && usage) {
     try {
@@ -371,8 +410,16 @@ async function tooledComplete(
     };
   }
 
+  // Wrap any reasoning content in <think>...</think> so the frontend can
+  // collapse it into a thought pane, matching the streaming path.
+  const reasoning = completion.reasoning_content;
+  const textResponse =
+    reasoning && reasoning.trim().length > 0
+      ? `<think>${reasoning}</think>${completion.content ?? ""}`
+      : completion.content;
+
   return {
-    textResponse: completion.content,
+    textResponse,
     cost,
     usage,
   };
diff --git a/server/utils/agents/aibitat/providers/helpers/untooled.js b/server/utils/agents/aibitat/providers/helpers/untooled.js
index 0ecc052c0..5409081f3 100644
--- a/server/utils/agents/aibitat/providers/helpers/untooled.js
+++ b/server/utils/agents/aibitat/providers/helpers/untooled.js
@@ -190,7 +190,16 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`;
     if (history[history.length - 1].role !== "user") return null;
 
     const msgUUID = v4();
+    // `textResponse` stays content-only so safeJsonParse below can still
+    // match a tool-call JSON payload. `displayedResponse` mirrors what was
+    // shown to the user — reasoning wrapped in <think>...</think> followed
+    // by content — and is returned as the text response when no tool call
+    // is parsed. The live status bubble uses human-readable framing
+    // ("Thinking:" / "Done thinking.") instead of raw tags because the
+    // StatusResponse component renders text literally.
     let textResponse = "";
+    let displayedResponse = "";
+    let reasoningText = "";
     const historyMessages = this.buildToolCallMessages(history, functions);
     const stream = await chatCb({ messages: historyMessages });
 
@@ -204,19 +213,54 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`;
       if (!chunk?.choices?.[0]) continue; // Skip if no choices
       const choice = chunk.choices[0];
 
-      if (choice.delta?.content) {
-        textResponse += choice.delta.content;
+      const reasoningToken = choice.delta?.reasoning_content;
+      if (reasoningToken) {
+        const liveChunk =
+          reasoningText.length === 0
+            ? `Thinking:\n\n${reasoningToken}`
+            : reasoningToken;
+        displayedResponse +=
+          reasoningText.length === 0
+            ? `<think>${reasoningToken}`
+            : reasoningToken;
+        reasoningText += reasoningToken;
         eventHandler?.("reportStreamEvent", {
           type: "statusResponse",
           uuid: msgUUID,
-          content: choice.delta.content,
+          content: liveChunk,
+        });
+      }
+
+      if (choice.delta?.content) {
+        const closingReasoning = reasoningText.length > 0;
+        const liveChunk = closingReasoning
+          ? `\n\nDone thinking.\n\n${choice.delta.content}`
+          : choice.delta.content;
+        if (closingReasoning) {
+          displayedResponse += `</think>`;
+          reasoningText = "";
+        }
+        textResponse += choice.delta.content;
+        displayedResponse += choice.delta.content;
+        eventHandler?.("reportStreamEvent", {
+          type: "statusResponse",
+          uuid: msgUUID,
+          content: liveChunk,
         });
       }
     }
 
+    // Stream ended while still inside a reasoning block (e.g. model
+    // produced only reasoning then stopped). Close the tag in the
+    // returned text so the frontend regex stays balanced.
+    if (reasoningText.length > 0) {
+      displayedResponse += `</think>`;
+      reasoningText = "";
+    }
+
     const call = safeJsonParse(textResponse, null);
     if (call === null)
-      return { toolCall: null, text: textResponse, uuid: msgUUID }; // failed to parse, so must be regular text response.
+      return { toolCall: null, text: displayedResponse, uuid: msgUUID }; // failed to parse, so must be regular text response.
 
     const { valid, reason } = this.validFuncCall(call, functions);
     if (!valid) {
@@ -335,14 +379,43 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`;
         );
         const msgUUID = v4();
         completion = { content: "" };
+        let reasoningText = "";
         const stream = await chatCallback({
           messages: this.cleanMsgs(messages),
         });
 
+        const closeReasoningIfOpen = () => {
+          if (reasoningText.length === 0) return;
+          completion.content += "</think>";
+          eventHandler?.("reportStreamEvent", {
+            type: "textResponseChunk",
+            uuid: msgUUID,
+            content: "</think>",
+          });
+          reasoningText = "";
+        };
+
         for await (const chunk of stream) {
           if (!chunk?.choices?.[0]) continue; // Skip if no choices
           const choice = chunk.choices[0];
+
+          const reasoningToken = choice.delta?.reasoning_content;
+          if (reasoningToken) {
+            const wrappedChunk =
+              reasoningText.length === 0
+                ? `<think>${reasoningToken}`
+                : reasoningToken;
+            reasoningText += reasoningToken;
+            completion.content += wrappedChunk;
+            eventHandler?.("reportStreamEvent", {
+              type: "textResponseChunk",
+              uuid: msgUUID,
+              content: wrappedChunk,
+            });
+          }
+
           if (choice.delta?.content) {
+            closeReasoningIfOpen();
             completion.content += choice.delta.content;
             eventHandler?.("reportStreamEvent", {
               type: "textResponseChunk",
@@ -351,6 +424,8 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`;
             });
           }
         }
+
+        closeReasoningIfOpen();
       }
 
       // The UnTooled class inherited Deduplicator is mostly useful to prevent the agent
@@ -414,10 +489,18 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`;
         // If the response from the callback is the raw OpenAI Spec response object, we can use that directly.
         // Otherwise, we will assume the response is just the string output we wanted (see: `#handleFunctionCallChat` which returns the content only)
         // This handles both streaming and non-streaming completions.
-        completion =
-          typeof response === "string"
-            ? { content: response }
-            : response.choices?.[0]?.message;
+        if (typeof response === "string") {
+          completion = { content: response };
+        } else {
+          const message = response.choices?.[0]?.message ?? {};
+          const reasoning = message.reasoning_content;
+          completion = {
+            content:
+              reasoning && reasoning.trim().length > 0
+                ? `<think>${reasoning}</think>${message.content ?? ""}`
+                : message.content,
+          };
+        }
       }
 
       // The UnTooled class inherited Deduplicator is mostly useful to prevent the agent