parse reasoning_content in agent tool-calling helpers

2026-06-15 23:20:32 +03:00 · 2026-05-21 14:20:37 -07:00
parent 03fe24a687
commit 73694891eb
2 changed files with 139 additions and 9 deletions
--- a/server/utils/agents/aibitat/providers/helpers/tooled.js
+++ b/server/utils/agents/aibitat/providers/helpers/tooled.js
@@ -205,6 +205,21 @@ async function tooledStream(

  const toolCallsByIndex = {};
  let usage = null;
+  let reasoningText = "";
+
+  // Emit a `</think>` chunk and reset reasoning state. Used whenever we
+  // transition out of a reasoning stretch (into visible text, into a tool
+  // call, or at end of stream) so the frontend regex stays balanced.
+  const closeReasoningIfOpen = () => {
+    if (reasoningText.length === 0) return;
+    result.textResponse += "</think>";
+    eventHandler?.("reportStreamEvent", {
+      type: "textResponseChunk",
+      uuid: msgUUID,
+      content: "</think>",
+    });
+    reasoningText = "";
+  };

  for await (const chunk of stream) {
    // Capture usage from final chunk (some providers send usage after finish_reason)
@@ -215,7 +230,26 @@ async function tooledStream(
    if (!chunk?.choices?.[0]) continue;
    const choice = chunk.choices[0];

+    // Reasoning models (LM Studio, Lemonade, DeepSeek, etc.) emit thinking
+    // tokens via `delta.reasoning_content`. Wrap them in <think>...</think>
+    // so the frontend's ThoughtContainer collapses them into a pane.
+    const reasoningToken = choice.delta?.reasoning_content;
+    if (reasoningToken) {
+      const wrappedChunk =
+        reasoningText.length === 0
+          ? `<think>${reasoningToken}`
+          : reasoningToken;
+      reasoningText += reasoningToken;
+      result.textResponse += wrappedChunk;
+      eventHandler?.("reportStreamEvent", {
+        type: "textResponseChunk",
+        uuid: msgUUID,
+        content: wrappedChunk,
+      });
+    }
+
    if (choice.delta?.content) {
+      closeReasoningIfOpen();
      result.textResponse += choice.delta.content;
      eventHandler?.("reportStreamEvent", {
        type: "textResponseChunk",
@@ -225,6 +259,7 @@ async function tooledStream(
    }

    if (choice.delta?.tool_calls) {
+      closeReasoningIfOpen();
      for (const toolCall of choice.delta.tool_calls) {
        const idx = toolCall.index ?? 0;

@@ -260,6 +295,10 @@ async function tooledStream(
    }
  }

+  // Defensive close in case the stream ended mid-reasoning (e.g. abort, or a
+  // provider that emits reasoning but no follow-up content/tool_call).
+  closeReasoningIfOpen();
+
  // Auto-record usage if provider is passed and usage is available
  if (provider?.recordUsage && usage) {
    try {
@@ -371,8 +410,16 @@ async function tooledComplete(
    };
  }

+  // Wrap any reasoning content in <think>...</think> so the frontend can
+  // collapse it into a thought pane, matching the streaming path.
+  const reasoning = completion.reasoning_content;
+  const textResponse =
+    reasoning && reasoning.trim().length > 0
+      ? `<think>${reasoning}</think>${completion.content ?? ""}`
+      : completion.content;
+
  return {
-    textResponse: completion.content,
+    textResponse,
    cost,
    usage,
  };
--- a/server/utils/agents/aibitat/providers/helpers/untooled.js
+++ b/server/utils/agents/aibitat/providers/helpers/untooled.js
@@ -190,7 +190,16 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`;
    if (history[history.length - 1].role !== "user") return null;

    const msgUUID = v4();
+    // `textResponse` stays content-only so safeJsonParse below can still
+    // match a tool-call JSON payload. `displayedResponse` mirrors what was
+    // shown to the user — reasoning wrapped in <think>...</think> followed
+    // by content — and is returned as the text response when no tool call
+    // is parsed. The live status bubble uses human-readable framing
+    // ("Thinking:" / "Done thinking.") instead of raw tags because the
+    // StatusResponse component renders text literally.
    let textResponse = "";
+    let displayedResponse = "";
+    let reasoningText = "";
    const historyMessages = this.buildToolCallMessages(history, functions);
    const stream = await chatCb({ messages: historyMessages });

@@ -204,19 +213,54 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`;
      if (!chunk?.choices?.[0]) continue; // Skip if no choices
      const choice = chunk.choices[0];

-      if (choice.delta?.content) {
-        textResponse += choice.delta.content;
+      const reasoningToken = choice.delta?.reasoning_content;
+      if (reasoningToken) {
+        const liveChunk =
+          reasoningText.length === 0
+            ? `Thinking:\n\n${reasoningToken}`
+            : reasoningToken;
+        displayedResponse +=
+          reasoningText.length === 0
+            ? `<think>${reasoningToken}`
+            : reasoningToken;
+        reasoningText += reasoningToken;
        eventHandler?.("reportStreamEvent", {
          type: "statusResponse",
          uuid: msgUUID,
-          content: choice.delta.content,
+          content: liveChunk,
+        });
+      }
+
+      if (choice.delta?.content) {
+        const closingReasoning = reasoningText.length > 0;
+        const liveChunk = closingReasoning
+          ? `\n\nDone thinking.\n\n${choice.delta.content}`
+          : choice.delta.content;
+        if (closingReasoning) {
+          displayedResponse += `</think>`;
+          reasoningText = "";
+        }
+        textResponse += choice.delta.content;
+        displayedResponse += choice.delta.content;
+        eventHandler?.("reportStreamEvent", {
+          type: "statusResponse",
+          uuid: msgUUID,
+          content: liveChunk,
        });
      }
    }

+    // Stream ended while still inside a reasoning block (e.g. model
+    // produced only reasoning then stopped). Close the tag in the
+    // returned text so the frontend regex stays balanced.
+    if (reasoningText.length > 0) {
+      displayedResponse += `</think>`;
+      reasoningText = "";
+    }
+
    const call = safeJsonParse(textResponse, null);
    if (call === null)
-      return { toolCall: null, text: textResponse, uuid: msgUUID }; // failed to parse, so must be regular text response.
+      return { toolCall: null, text: displayedResponse, uuid: msgUUID }; // failed to parse, so must be regular text response.

    const { valid, reason } = this.validFuncCall(call, functions);
    if (!valid) {
@@ -335,14 +379,43 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`;
        );
        const msgUUID = v4();
        completion = { content: "" };
+        let reasoningText = "";
        const stream = await chatCallback({
          messages: this.cleanMsgs(messages),
        });

+        const closeReasoningIfOpen = () => {
+          if (reasoningText.length === 0) return;
+          completion.content += "</think>";
+          eventHandler?.("reportStreamEvent", {
+            type: "textResponseChunk",
+            uuid: msgUUID,
+            content: "</think>",
+          });
+          reasoningText = "";
+        };
+
        for await (const chunk of stream) {
          if (!chunk?.choices?.[0]) continue; // Skip if no choices
          const choice = chunk.choices[0];
+
+          const reasoningToken = choice.delta?.reasoning_content;
+          if (reasoningToken) {
+            const wrappedChunk =
+              reasoningText.length === 0
+                ? `<think>${reasoningToken}`
+                : reasoningToken;
+            reasoningText += reasoningToken;
+            completion.content += wrappedChunk;
+            eventHandler?.("reportStreamEvent", {
+              type: "textResponseChunk",
+              uuid: msgUUID,
+              content: wrappedChunk,
+            });
+          }
+
          if (choice.delta?.content) {
+            closeReasoningIfOpen();
            completion.content += choice.delta.content;
            eventHandler?.("reportStreamEvent", {
              type: "textResponseChunk",
@@ -351,6 +424,8 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`;
            });
          }
        }
+
+        closeReasoningIfOpen();
      }

      // The UnTooled class inherited Deduplicator is mostly useful to prevent the agent
@@ -414,10 +489,18 @@ ${JSON.stringify(def.parameters.properties, null, 4)}\n`;
        // If the response from the callback is the raw OpenAI Spec response object, we can use that directly.
        // Otherwise, we will assume the response is just the string output we wanted (see: `#handleFunctionCallChat` which returns the content only)
        // This handles both streaming and non-streaming completions.
-        completion =
-          typeof response === "string"
-            ? { content: response }
-            : response.choices?.[0]?.message;
+        if (typeof response === "string") {
+          completion = { content: response };
+        } else {
+          const message = response.choices?.[0]?.message ?? {};
+          const reasoning = message.reasoning_content;
+          completion = {
+            content:
+              reasoning && reasoning.trim().length > 0
+                ? `<think>${reasoning}</think>${message.content ?? ""}`
+                : message.content,
+          };
+        }
      }

      // The UnTooled class inherited Deduplicator is mostly useful to prevent the agent