From 982d36fb968eb375926873a0bc63f46a91cfbc29 Mon Sep 17 00:00:00 2001
From: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>
Date: Sat, 2 May 2026 01:38:24 +0200
Subject: [PATCH] fix(release): bump AI summary timeout + diagnose empty
 content (#3783)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(release): bump AI summary timeout + diagnose empty content

The v1.6.8 release run hit two related failures in the AI TL;DR step:

  curl: (28) Operation timed out after 120001 milliseconds
  WARNING: AI response 2xx but no .choices[0].message.content
           — skipping summary

The 120s --max-time was too tight for kimi-k2-thinking (and likely
other thinking models) on a multi-PR release prompt. The retry
succeeded HTTP-wise but returned a response without any extractable
content, so the release shipped without a TL;DR.

Three changes:

1. Default --max-time from 120s to 300s, configurable via
   vars.AI_RELEASE_SUMMARY_MAX_TIME. Releases never fail because of
   the AI step (the whole block is best-effort), but giving thinking
   models five minutes is realistic.

2. Fall back to `.choices[0].message.reasoning_content` when
   `.content` is empty. Some providers route thinking-model output
   into the reasoning field. Cheap try-and-fall-back; no harm if the
   field is missing.

3. When BOTH fields are empty, dump the response shape (top-level
   keys, message keys, finish_reason, error field) to step logs.
   Bounded to ~4 lines, but enough to debug the next failure
   without rerunning.

Behavior unchanged when the call succeeds normally.

* fix(release): bump AI summary timeout default to 15 min

900s (15 min) covers thinking models on multi-PR release prompts
with comfortable headroom. Still configurable via
vars.AI_RELEASE_SUMMARY_MAX_TIME.
---
 .github/workflows/release.yml | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 2c15144d5..8c8fff403 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -464,12 +464,16 @@ jobs:
             # Up to 2 attempts (1 retry) with a short backoff. The model
             # endpoint can hiccup on cold starts or transient rate-limits;
             # one retry covers nearly all of those without delaying releases.
+            # Default --max-time is 900s (15 min) — kimi-k2-thinking and
+            # other thinking models can spend many minutes reasoning before
+            # producing output, so the previous 120s ceiling was too
+            # tight (observed on the v1.6.8 release run).
             HTTP_CODE="000"
             for attempt in 1 2; do
               HTTP_CODE=$(printf '%s' "$JSON_PAYLOAD" | curl -sS \
                 -o "$AI_RESPONSE_FILE" \
                 -w '%{http_code}' \
-                --max-time 120 \
+                --max-time "$AI_MAX_TIME" \
                 -X POST "https://openrouter.ai/api/v1/chat/completions" \
                 -H "Content-Type: application/json" \
                 -H "Authorization: Bearer $OPENROUTER_API_KEY" \
@@ -486,11 +490,27 @@ jobs:
             done
 
             if [[ "$HTTP_CODE" =~ ^2 ]]; then
+              # Some thinking models on some providers route the final answer
+              # into `reasoning_content` rather than `content`. Try `content`
+              # first; fall back to `reasoning_content` if it's empty.
               AI_SUMMARY=$(jq -r '.choices[0].message.content // empty' "$AI_RESPONSE_FILE" 2>/dev/null || true)
+              if [[ -z "$AI_SUMMARY" ]]; then
+                AI_SUMMARY=$(jq -r '.choices[0].message.reasoning_content // empty' "$AI_RESPONSE_FILE" 2>/dev/null || true)
+                if [[ -n "$AI_SUMMARY" ]]; then
+                  echo "NOTE: empty .content; using .reasoning_content from $AI_MODEL"
+                fi
+              fi
               if [[ -n "$AI_SUMMARY" ]]; then
                 echo "AI summary generated (model=$AI_MODEL, ${#AI_SUMMARY} chars)"
               else
-                echo "WARNING: AI response 2xx but no .choices[0].message.content — skipping summary"
+                echo "WARNING: AI response 2xx but neither .content nor .reasoning_content — skipping summary"
+                # Diagnostic: dump the response shape so the next failure
+                # can be debugged without rerunning. Bounded to keep noise
+                # out of logs.
+                echo "  response keys: $(jq -r 'keys | join(", ")' "$AI_RESPONSE_FILE" 2>/dev/null || echo '<unparseable>')"
+                echo "  message keys:  $(jq -r '.choices[0].message | keys | join(", ")' "$AI_RESPONSE_FILE" 2>/dev/null || echo '<missing>')"
+                echo "  finish_reason: $(jq -r '.choices[0].finish_reason // "<none>"' "$AI_RESPONSE_FILE" 2>/dev/null)"
+                echo "  error field:   $(jq -r '.error // empty' "$AI_RESPONSE_FILE" 2>/dev/null)"
               fi
             else
               echo "WARNING: AI summary call failed (HTTP $HTTP_CODE) — skipping summary"
@@ -534,6 +554,12 @@ jobs:
           AI_MODEL: ${{ vars.AI_MODEL || 'moonshotai/kimi-k2-thinking' }}
           AI_TEMPERATURE: ${{ vars.AI_RELEASE_SUMMARY_TEMPERATURE || '0.3' }}
           AI_MAX_TOKENS: ${{ vars.AI_RELEASE_SUMMARY_MAX_TOKENS || '4000' }}
+          # Curl --max-time for the OpenRouter call. Default 900s (15 min) —
+          # thinking models need substantial headroom on a multi-PR release
+          # prompt (the previous 120s default timed out on v1.6.8 with
+          # kimi-k2-thinking). Override via vars.AI_RELEASE_SUMMARY_MAX_TIME
+          # if you want the AI step to fail faster on stuck calls.
+          AI_MAX_TIME: ${{ vars.AI_RELEASE_SUMMARY_MAX_TIME || '900' }}
           SUMMARY_PROMPT: >
             You are summarizing a software release for the project's GitHub release page.
             Two inputs follow: the hand-written release notes from the repository (may be empty)