From 982d36fb968eb375926873a0bc63f46a91cfbc29 Mon Sep 17 00:00:00 2001 From: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com> Date: Sat, 2 May 2026 01:38:24 +0200 Subject: [PATCH] fix(release): bump AI summary timeout + diagnose empty content (#3783) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(release): bump AI summary timeout + diagnose empty content The v1.6.8 release run hit two related failures in the AI TL;DR step: curl: (28) Operation timed out after 120001 milliseconds WARNING: AI response 2xx but no .choices[0].message.content — skipping summary The 120s --max-time was too tight for kimi-k2-thinking (and likely other thinking models) on a multi-PR release prompt. The retry succeeded HTTP-wise but returned a response without any extractable content, so the release shipped without a TL;DR. Three changes: 1. Default --max-time from 120s to 300s, configurable via vars.AI_RELEASE_SUMMARY_MAX_TIME. Releases never fail because of the AI step (the whole block is best-effort), but giving thinking models five minutes is realistic. 2. Fall back to `.choices[0].message.reasoning_content` when `.content` is empty. Some providers route thinking-model output into the reasoning field. Cheap try-and-fall-back; no harm if the field is missing. 3. When BOTH fields are empty, dump the response shape (top-level keys, message keys, finish_reason, error field) to step logs. Bounded to ~4 lines, but enough to debug the next failure without rerunning. Behavior unchanged when the call succeeds normally. * fix(release): bump AI summary timeout default to 15 min 900s (15 min) covers thinking models on multi-PR release prompts with comfortable headroom. Still configurable via vars.AI_RELEASE_SUMMARY_MAX_TIME. --- .github/workflows/release.yml | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2c15144d5..8c8fff403 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -464,12 +464,16 @@ jobs: # Up to 2 attempts (1 retry) with a short backoff. The model # endpoint can hiccup on cold starts or transient rate-limits; # one retry covers nearly all of those without delaying releases. + # Default --max-time is 900s (15 min) — kimi-k2-thinking and + # other thinking models can spend many minutes reasoning before + # producing output, so the previous 120s ceiling was too + # tight (observed on the v1.6.8 release run). HTTP_CODE="000" for attempt in 1 2; do HTTP_CODE=$(printf '%s' "$JSON_PAYLOAD" | curl -sS \ -o "$AI_RESPONSE_FILE" \ -w '%{http_code}' \ - --max-time 120 \ + --max-time "$AI_MAX_TIME" \ -X POST "https://openrouter.ai/api/v1/chat/completions" \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $OPENROUTER_API_KEY" \ @@ -486,11 +490,27 @@ jobs: done if [[ "$HTTP_CODE" =~ ^2 ]]; then + # Some thinking models on some providers route the final answer + # into `reasoning_content` rather than `content`. Try `content` + # first; fall back to `reasoning_content` if it's empty. AI_SUMMARY=$(jq -r '.choices[0].message.content // empty' "$AI_RESPONSE_FILE" 2>/dev/null || true) + if [[ -z "$AI_SUMMARY" ]]; then + AI_SUMMARY=$(jq -r '.choices[0].message.reasoning_content // empty' "$AI_RESPONSE_FILE" 2>/dev/null || true) + if [[ -n "$AI_SUMMARY" ]]; then + echo "NOTE: empty .content; using .reasoning_content from $AI_MODEL" + fi + fi if [[ -n "$AI_SUMMARY" ]]; then echo "AI summary generated (model=$AI_MODEL, ${#AI_SUMMARY} chars)" else - echo "WARNING: AI response 2xx but no .choices[0].message.content — skipping summary" + echo "WARNING: AI response 2xx but neither .content nor .reasoning_content — skipping summary" + # Diagnostic: dump the response shape so the next failure + # can be debugged without rerunning. Bounded to keep noise + # out of logs. + echo " response keys: $(jq -r 'keys | join(", ")' "$AI_RESPONSE_FILE" 2>/dev/null || echo '')" + echo " message keys: $(jq -r '.choices[0].message | keys | join(", ")' "$AI_RESPONSE_FILE" 2>/dev/null || echo '')" + echo " finish_reason: $(jq -r '.choices[0].finish_reason // ""' "$AI_RESPONSE_FILE" 2>/dev/null)" + echo " error field: $(jq -r '.error // empty' "$AI_RESPONSE_FILE" 2>/dev/null)" fi else echo "WARNING: AI summary call failed (HTTP $HTTP_CODE) — skipping summary" @@ -534,6 +554,12 @@ jobs: AI_MODEL: ${{ vars.AI_MODEL || 'moonshotai/kimi-k2-thinking' }} AI_TEMPERATURE: ${{ vars.AI_RELEASE_SUMMARY_TEMPERATURE || '0.3' }} AI_MAX_TOKENS: ${{ vars.AI_RELEASE_SUMMARY_MAX_TOKENS || '4000' }} + # Curl --max-time for the OpenRouter call. Default 900s (15 min) — + # thinking models need substantial headroom on a multi-PR release + # prompt (the previous 120s default timed out on v1.6.8 with + # kimi-k2-thinking). Override via vars.AI_RELEASE_SUMMARY_MAX_TIME + # if you want the AI step to fail faster on stuck calls. + AI_MAX_TIME: ${{ vars.AI_RELEASE_SUMMARY_MAX_TIME || '900' }} SUMMARY_PROMPT: > You are summarizing a software release for the project's GitHub release page. Two inputs follow: the hand-written release notes from the repository (may be empty)