mirror of
https://github.com/LearningCircuit/local-deep-research.git
synced 2026-06-15 19:46:56 +03:00
fix(ci): make LDR research workflow honestly fail on Python crash (#4226)
* fix(ci): make LDR research workflow honestly fail on Python crash A real run (job 77511717371, PR #4225) crashed with glibc 'double free or corruption (!prev)' but the workflow reported success and the caller posted a hollow PR comment. Two cooperating defects: the script's exit code was discarded inside set +e / set -e, and `jq .` exits 0 on a zero-byte response.json so the JSON-shape check passed on empty input. Capture the exit code, harden the validation order (exit -> non-empty -> jq -e shape -> .error -> .research non-empty), tee stderr to a log surfaced in the ::error:: annotation, upload the artifact with if: always() so failed runs leave debuggable evidence, and flush stdout in a finally block in the script so a SIGABRT during interpreter shutdown after json.dumps can't drop the otherwise-completed output. Matches the house pattern from dockle.yml and the jq -e idiom from release-gate.yml. * chore(ci): enable faulthandler in ldr-research.py Dumps a Python traceback to stderr on SIGABRT/SIGSEGV/SIGFPE/SIGBUS/SIGILL before the signal re-fires. Pairs with the stderr-capture plumbing earlier in this PR: on the next glibc abort the ::error:: annotation will show "Fatal Python error: Aborted" plus the actual Python stack frame, making the deps-level investigation possible without a re-run. Verified locally: a deliberately aborted child process emits its frame through faulthandler before exiting on signal 6.
This commit is contained in:
38
.github/workflows/ldr-research-reusable.yml
vendored
38
.github/workflows/ldr-research-reusable.yml
vendored
@@ -181,10 +181,12 @@ jobs:
|
||||
set +e
|
||||
if [ "$ITERATIONS" -gt 0 ] 2>/dev/null; then
|
||||
pdm run python scripts/ldr-research.py --iterations "$ITERATIONS" \
|
||||
< query.txt > response.json
|
||||
< query.txt 2> >(tee stderr.log >&2) > response.json
|
||||
else
|
||||
pdm run python scripts/ldr-research.py < query.txt > response.json
|
||||
pdm run python scripts/ldr-research.py \
|
||||
< query.txt 2> >(tee stderr.log >&2) > response.json
|
||||
fi
|
||||
LDR_EXIT_CODE=$?
|
||||
set -e
|
||||
|
||||
echo "=== Response (first 2000 chars): ==="
|
||||
@@ -192,9 +194,24 @@ jobs:
|
||||
echo ""
|
||||
echo "=== End response ==="
|
||||
|
||||
# Validate JSON shape
|
||||
if ! jq . response.json > /dev/null 2>&1; then
|
||||
echo "::error::Response is not valid JSON"
|
||||
# Catches SIGABRT/native crashes where stdout never flushed; JSON check alone can't see this.
|
||||
if [ "$LDR_EXIT_CODE" -ne 0 ]; then
|
||||
LAST_ERR=$(tail -c 500 stderr.log 2>/dev/null || echo "")
|
||||
echo "::error::ldr-research.py exited with code $LDR_EXIT_CODE: $LAST_ERR"
|
||||
echo "success=false" >> "$GITHUB_OUTPUT"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# `jq .` exits 0 on a zero-byte file, so guard explicitly.
|
||||
if [ ! -s response.json ]; then
|
||||
echo "::error::response.json is empty"
|
||||
echo "success=false" >> "$GITHUB_OUTPUT"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Shape validation: must be a JSON object.
|
||||
if ! jq -e 'type == "object"' response.json > /dev/null 2>&1; then
|
||||
echo "::error::Response is not a JSON object"
|
||||
echo "success=false" >> "$GITHUB_OUTPUT"
|
||||
exit 1
|
||||
fi
|
||||
@@ -206,6 +223,14 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Empty .research would still produce a hollow downstream comment.
|
||||
RESEARCH=$(jq -r '.research // empty' response.json)
|
||||
if [ -z "$RESEARCH" ]; then
|
||||
echo "::error::Response missing or empty .research field"
|
||||
echo "success=false" >> "$GITHUB_OUTPUT"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "success=true" >> "$GITHUB_OUTPUT"
|
||||
echo "✅ Research completed"
|
||||
|
||||
@@ -263,10 +288,13 @@ jobs:
|
||||
echo "Final comment.md size: $(wc -c < comment.md) bytes"
|
||||
|
||||
- name: Upload research artifact
|
||||
if: always()
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: ${{ steps.artifact-name.outputs.name }}
|
||||
path: |
|
||||
comment.md
|
||||
response.json
|
||||
stderr.log
|
||||
if-no-files-found: ignore
|
||||
retention-days: 7
|
||||
|
||||
@@ -30,10 +30,14 @@ Note: This uses the programmatic API and does NOT require a running LDR server.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import faulthandler
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Dump a Python traceback to stderr on SIGABRT/SIGSEGV/SIGFPE/SIGBUS/SIGILL.
|
||||
faulthandler.enable()
|
||||
|
||||
|
||||
def make_serializable(obj):
|
||||
"""Convert objects to JSON-serializable format."""
|
||||
@@ -96,91 +100,100 @@ def parse_args():
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
|
||||
# Read query from stdin
|
||||
query = sys.stdin.read().strip()
|
||||
if not query:
|
||||
print(json.dumps({"error": "No query provided on stdin"}))
|
||||
sys.exit(1)
|
||||
|
||||
# Default model for OpenRouter if not specified
|
||||
model_name = args.model
|
||||
if not model_name and args.provider == "openrouter":
|
||||
model_name = "google/gemini-2.0-flash-001"
|
||||
|
||||
# Check required API keys
|
||||
if args.provider == "openrouter" and not os.environ.get(
|
||||
"OPENROUTER_API_KEY"
|
||||
):
|
||||
print(json.dumps({"error": "OPENROUTER_API_KEY not set"}))
|
||||
sys.exit(1)
|
||||
|
||||
if args.search_tool == "serper" and not os.environ.get("SERPER_API_KEY"):
|
||||
print(json.dumps({"error": "SERPER_API_KEY not set"}))
|
||||
sys.exit(1)
|
||||
|
||||
# Flush in finally: SIGABRT during interpreter shutdown won't drain the stdout buffer.
|
||||
try:
|
||||
from local_deep_research.api import quick_summary
|
||||
from local_deep_research.api.settings_utils import (
|
||||
create_settings_snapshot,
|
||||
)
|
||||
args = parse_args()
|
||||
|
||||
# Build settings overrides
|
||||
overrides = {
|
||||
"search.tool": args.search_tool,
|
||||
"llm.provider": args.provider,
|
||||
}
|
||||
if model_name:
|
||||
overrides["llm.model"] = model_name
|
||||
# Read query from stdin
|
||||
query = sys.stdin.read().strip()
|
||||
if not query:
|
||||
print(json.dumps({"error": "No query provided on stdin"}))
|
||||
sys.exit(1)
|
||||
|
||||
# Add API keys from environment
|
||||
if os.environ.get("OPENROUTER_API_KEY"):
|
||||
overrides["llm.openrouter.api_key"] = os.environ[
|
||||
"OPENROUTER_API_KEY"
|
||||
]
|
||||
if os.environ.get("SERPER_API_KEY"):
|
||||
overrides["search.engine.web.serper.api_key"] = os.environ[
|
||||
"SERPER_API_KEY"
|
||||
]
|
||||
# Default model for OpenRouter if not specified
|
||||
model_name = args.model
|
||||
if not model_name and args.provider == "openrouter":
|
||||
model_name = "google/gemini-2.0-flash-001"
|
||||
|
||||
settings = create_settings_snapshot(overrides=overrides)
|
||||
# Check required API keys
|
||||
if args.provider == "openrouter" and not os.environ.get(
|
||||
"OPENROUTER_API_KEY"
|
||||
):
|
||||
print(json.dumps({"error": "OPENROUTER_API_KEY not set"}))
|
||||
sys.exit(1)
|
||||
|
||||
# Build kwargs
|
||||
kwargs = {
|
||||
"query": query,
|
||||
"provider": args.provider,
|
||||
"search_tool": args.search_tool,
|
||||
"settings_snapshot": settings,
|
||||
"programmatic_mode": True,
|
||||
"search_strategy": args.strategy,
|
||||
}
|
||||
if model_name:
|
||||
kwargs["model_name"] = model_name
|
||||
if args.iterations is not None:
|
||||
kwargs["iterations"] = args.iterations
|
||||
if args.search_tool == "serper" and not os.environ.get(
|
||||
"SERPER_API_KEY"
|
||||
):
|
||||
print(json.dumps({"error": "SERPER_API_KEY not set"}))
|
||||
sys.exit(1)
|
||||
|
||||
result = quick_summary(**kwargs)
|
||||
try:
|
||||
from local_deep_research.api import quick_summary
|
||||
from local_deep_research.api.settings_utils import (
|
||||
create_settings_snapshot,
|
||||
)
|
||||
|
||||
# Use formatted_findings if available (already properly formatted with sources)
|
||||
# Fall back to summary if not
|
||||
research_output = result.get("formatted_findings") or result.get(
|
||||
"summary", str(result)
|
||||
)
|
||||
# Build settings overrides
|
||||
overrides = {
|
||||
"search.tool": args.search_tool,
|
||||
"llm.provider": args.provider,
|
||||
}
|
||||
if model_name:
|
||||
overrides["llm.model"] = model_name
|
||||
|
||||
# Build output - make sure everything is JSON serializable
|
||||
output = {
|
||||
"research": research_output,
|
||||
"sources": make_serializable(result.get("sources", [])),
|
||||
"findings": make_serializable(result.get("findings", [])),
|
||||
"iterations": result.get("iterations"),
|
||||
}
|
||||
# Add API keys from environment
|
||||
if os.environ.get("OPENROUTER_API_KEY"):
|
||||
overrides["llm.openrouter.api_key"] = os.environ[
|
||||
"OPENROUTER_API_KEY"
|
||||
]
|
||||
if os.environ.get("SERPER_API_KEY"):
|
||||
overrides["search.engine.web.serper.api_key"] = os.environ[
|
||||
"SERPER_API_KEY"
|
||||
]
|
||||
|
||||
print(json.dumps(output))
|
||||
settings = create_settings_snapshot(overrides=overrides)
|
||||
|
||||
except Exception as e:
|
||||
print(json.dumps({"error": str(e)}))
|
||||
sys.exit(1)
|
||||
# Build kwargs
|
||||
kwargs = {
|
||||
"query": query,
|
||||
"provider": args.provider,
|
||||
"search_tool": args.search_tool,
|
||||
"settings_snapshot": settings,
|
||||
"programmatic_mode": True,
|
||||
"search_strategy": args.strategy,
|
||||
}
|
||||
if model_name:
|
||||
kwargs["model_name"] = model_name
|
||||
if args.iterations is not None:
|
||||
kwargs["iterations"] = args.iterations
|
||||
|
||||
result = quick_summary(**kwargs)
|
||||
|
||||
# Use formatted_findings if available (already properly formatted with sources)
|
||||
# Fall back to summary if not
|
||||
research_output = result.get("formatted_findings") or result.get(
|
||||
"summary", str(result)
|
||||
)
|
||||
|
||||
# Build output - make sure everything is JSON serializable
|
||||
output = {
|
||||
"research": research_output,
|
||||
"sources": make_serializable(result.get("sources", [])),
|
||||
"findings": make_serializable(result.get("findings", [])),
|
||||
"iterations": result.get("iterations"),
|
||||
}
|
||||
|
||||
print(json.dumps(output))
|
||||
|
||||
except Exception as e:
|
||||
print(json.dumps({"error": str(e)}))
|
||||
sys.exit(1)
|
||||
finally:
|
||||
try:
|
||||
sys.stdout.flush()
|
||||
except Exception: # noqa: silent-exception
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user