diff --git a/.github/workflows/ldr-research-reusable.yml b/.github/workflows/ldr-research-reusable.yml index c730a715b..1cda1f249 100644 --- a/.github/workflows/ldr-research-reusable.yml +++ b/.github/workflows/ldr-research-reusable.yml @@ -181,10 +181,12 @@ jobs: set +e if [ "$ITERATIONS" -gt 0 ] 2>/dev/null; then pdm run python scripts/ldr-research.py --iterations "$ITERATIONS" \ - < query.txt > response.json + < query.txt 2> >(tee stderr.log >&2) > response.json else - pdm run python scripts/ldr-research.py < query.txt > response.json + pdm run python scripts/ldr-research.py \ + < query.txt 2> >(tee stderr.log >&2) > response.json fi + LDR_EXIT_CODE=$? set -e echo "=== Response (first 2000 chars): ===" @@ -192,9 +194,24 @@ jobs: echo "" echo "=== End response ===" - # Validate JSON shape - if ! jq . response.json > /dev/null 2>&1; then - echo "::error::Response is not valid JSON" + # Catches SIGABRT/native crashes where stdout never flushed; JSON check alone can't see this. + if [ "$LDR_EXIT_CODE" -ne 0 ]; then + LAST_ERR=$(tail -c 500 stderr.log 2>/dev/null || echo "") + echo "::error::ldr-research.py exited with code $LDR_EXIT_CODE: $LAST_ERR" + echo "success=false" >> "$GITHUB_OUTPUT" + exit 1 + fi + + # `jq .` exits 0 on a zero-byte file, so guard explicitly. + if [ ! -s response.json ]; then + echo "::error::response.json is empty" + echo "success=false" >> "$GITHUB_OUTPUT" + exit 1 + fi + + # Shape validation: must be a JSON object. + if ! jq -e 'type == "object"' response.json > /dev/null 2>&1; then + echo "::error::Response is not a JSON object" echo "success=false" >> "$GITHUB_OUTPUT" exit 1 fi @@ -206,6 +223,14 @@ jobs: exit 1 fi + # Empty .research would still produce a hollow downstream comment. + RESEARCH=$(jq -r '.research // empty' response.json) + if [ -z "$RESEARCH" ]; then + echo "::error::Response missing or empty .research field" + echo "success=false" >> "$GITHUB_OUTPUT" + exit 1 + fi + echo "success=true" >> "$GITHUB_OUTPUT" echo "✅ Research completed" @@ -263,10 +288,13 @@ jobs: echo "Final comment.md size: $(wc -c < comment.md) bytes" - name: Upload research artifact + if: always() uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: ${{ steps.artifact-name.outputs.name }} path: | comment.md response.json + stderr.log + if-no-files-found: ignore retention-days: 7 diff --git a/scripts/ldr-research.py b/scripts/ldr-research.py index a25759c72..151931c3d 100755 --- a/scripts/ldr-research.py +++ b/scripts/ldr-research.py @@ -30,10 +30,14 @@ Note: This uses the programmatic API and does NOT require a running LDR server. """ import argparse +import faulthandler import json import os import sys +# Dump a Python traceback to stderr on SIGABRT/SIGSEGV/SIGFPE/SIGBUS/SIGILL. +faulthandler.enable() + def make_serializable(obj): """Convert objects to JSON-serializable format.""" @@ -96,91 +100,100 @@ def parse_args(): def main(): - args = parse_args() - - # Read query from stdin - query = sys.stdin.read().strip() - if not query: - print(json.dumps({"error": "No query provided on stdin"})) - sys.exit(1) - - # Default model for OpenRouter if not specified - model_name = args.model - if not model_name and args.provider == "openrouter": - model_name = "google/gemini-2.0-flash-001" - - # Check required API keys - if args.provider == "openrouter" and not os.environ.get( - "OPENROUTER_API_KEY" - ): - print(json.dumps({"error": "OPENROUTER_API_KEY not set"})) - sys.exit(1) - - if args.search_tool == "serper" and not os.environ.get("SERPER_API_KEY"): - print(json.dumps({"error": "SERPER_API_KEY not set"})) - sys.exit(1) - + # Flush in finally: SIGABRT during interpreter shutdown won't drain the stdout buffer. try: - from local_deep_research.api import quick_summary - from local_deep_research.api.settings_utils import ( - create_settings_snapshot, - ) + args = parse_args() - # Build settings overrides - overrides = { - "search.tool": args.search_tool, - "llm.provider": args.provider, - } - if model_name: - overrides["llm.model"] = model_name + # Read query from stdin + query = sys.stdin.read().strip() + if not query: + print(json.dumps({"error": "No query provided on stdin"})) + sys.exit(1) - # Add API keys from environment - if os.environ.get("OPENROUTER_API_KEY"): - overrides["llm.openrouter.api_key"] = os.environ[ - "OPENROUTER_API_KEY" - ] - if os.environ.get("SERPER_API_KEY"): - overrides["search.engine.web.serper.api_key"] = os.environ[ - "SERPER_API_KEY" - ] + # Default model for OpenRouter if not specified + model_name = args.model + if not model_name and args.provider == "openrouter": + model_name = "google/gemini-2.0-flash-001" - settings = create_settings_snapshot(overrides=overrides) + # Check required API keys + if args.provider == "openrouter" and not os.environ.get( + "OPENROUTER_API_KEY" + ): + print(json.dumps({"error": "OPENROUTER_API_KEY not set"})) + sys.exit(1) - # Build kwargs - kwargs = { - "query": query, - "provider": args.provider, - "search_tool": args.search_tool, - "settings_snapshot": settings, - "programmatic_mode": True, - "search_strategy": args.strategy, - } - if model_name: - kwargs["model_name"] = model_name - if args.iterations is not None: - kwargs["iterations"] = args.iterations + if args.search_tool == "serper" and not os.environ.get( + "SERPER_API_KEY" + ): + print(json.dumps({"error": "SERPER_API_KEY not set"})) + sys.exit(1) - result = quick_summary(**kwargs) + try: + from local_deep_research.api import quick_summary + from local_deep_research.api.settings_utils import ( + create_settings_snapshot, + ) - # Use formatted_findings if available (already properly formatted with sources) - # Fall back to summary if not - research_output = result.get("formatted_findings") or result.get( - "summary", str(result) - ) + # Build settings overrides + overrides = { + "search.tool": args.search_tool, + "llm.provider": args.provider, + } + if model_name: + overrides["llm.model"] = model_name - # Build output - make sure everything is JSON serializable - output = { - "research": research_output, - "sources": make_serializable(result.get("sources", [])), - "findings": make_serializable(result.get("findings", [])), - "iterations": result.get("iterations"), - } + # Add API keys from environment + if os.environ.get("OPENROUTER_API_KEY"): + overrides["llm.openrouter.api_key"] = os.environ[ + "OPENROUTER_API_KEY" + ] + if os.environ.get("SERPER_API_KEY"): + overrides["search.engine.web.serper.api_key"] = os.environ[ + "SERPER_API_KEY" + ] - print(json.dumps(output)) + settings = create_settings_snapshot(overrides=overrides) - except Exception as e: - print(json.dumps({"error": str(e)})) - sys.exit(1) + # Build kwargs + kwargs = { + "query": query, + "provider": args.provider, + "search_tool": args.search_tool, + "settings_snapshot": settings, + "programmatic_mode": True, + "search_strategy": args.strategy, + } + if model_name: + kwargs["model_name"] = model_name + if args.iterations is not None: + kwargs["iterations"] = args.iterations + + result = quick_summary(**kwargs) + + # Use formatted_findings if available (already properly formatted with sources) + # Fall back to summary if not + research_output = result.get("formatted_findings") or result.get( + "summary", str(result) + ) + + # Build output - make sure everything is JSON serializable + output = { + "research": research_output, + "sources": make_serializable(result.get("sources", [])), + "findings": make_serializable(result.get("findings", [])), + "iterations": result.get("iterations"), + } + + print(json.dumps(output)) + + except Exception as e: + print(json.dumps({"error": str(e)})) + sys.exit(1) + finally: + try: + sys.stdout.flush() + except Exception: # noqa: silent-exception + pass if __name__ == "__main__":