mirror of
https://github.com/LearningCircuit/local-deep-research.git
synced 2026-06-15 19:46:56 +03:00
fix(ci): make LDR research workflow honestly fail on Python crash (#4226)
* fix(ci): make LDR research workflow honestly fail on Python crash A real run (job 77511717371, PR #4225) crashed with glibc 'double free or corruption (!prev)' but the workflow reported success and the caller posted a hollow PR comment. Two cooperating defects: the script's exit code was discarded inside set +e / set -e, and `jq .` exits 0 on a zero-byte response.json so the JSON-shape check passed on empty input. Capture the exit code, harden the validation order (exit -> non-empty -> jq -e shape -> .error -> .research non-empty), tee stderr to a log surfaced in the ::error:: annotation, upload the artifact with if: always() so failed runs leave debuggable evidence, and flush stdout in a finally block in the script so a SIGABRT during interpreter shutdown after json.dumps can't drop the otherwise-completed output. Matches the house pattern from dockle.yml and the jq -e idiom from release-gate.yml. * chore(ci): enable faulthandler in ldr-research.py Dumps a Python traceback to stderr on SIGABRT/SIGSEGV/SIGFPE/SIGBUS/SIGILL before the signal re-fires. Pairs with the stderr-capture plumbing earlier in this PR: on the next glibc abort the ::error:: annotation will show "Fatal Python error: Aborted" plus the actual Python stack frame, making the deps-level investigation possible without a re-run. Verified locally: a deliberately aborted child process emits its frame through faulthandler before exiting on signal 6.
This commit is contained in:
@@ -30,10 +30,14 @@ Note: This uses the programmatic API and does NOT require a running LDR server.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import faulthandler
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Dump a Python traceback to stderr on SIGABRT/SIGSEGV/SIGFPE/SIGBUS/SIGILL.
|
||||
faulthandler.enable()
|
||||
|
||||
|
||||
def make_serializable(obj):
|
||||
"""Convert objects to JSON-serializable format."""
|
||||
@@ -96,91 +100,100 @@ def parse_args():
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
|
||||
# Read query from stdin
|
||||
query = sys.stdin.read().strip()
|
||||
if not query:
|
||||
print(json.dumps({"error": "No query provided on stdin"}))
|
||||
sys.exit(1)
|
||||
|
||||
# Default model for OpenRouter if not specified
|
||||
model_name = args.model
|
||||
if not model_name and args.provider == "openrouter":
|
||||
model_name = "google/gemini-2.0-flash-001"
|
||||
|
||||
# Check required API keys
|
||||
if args.provider == "openrouter" and not os.environ.get(
|
||||
"OPENROUTER_API_KEY"
|
||||
):
|
||||
print(json.dumps({"error": "OPENROUTER_API_KEY not set"}))
|
||||
sys.exit(1)
|
||||
|
||||
if args.search_tool == "serper" and not os.environ.get("SERPER_API_KEY"):
|
||||
print(json.dumps({"error": "SERPER_API_KEY not set"}))
|
||||
sys.exit(1)
|
||||
|
||||
# Flush in finally: SIGABRT during interpreter shutdown won't drain the stdout buffer.
|
||||
try:
|
||||
from local_deep_research.api import quick_summary
|
||||
from local_deep_research.api.settings_utils import (
|
||||
create_settings_snapshot,
|
||||
)
|
||||
args = parse_args()
|
||||
|
||||
# Build settings overrides
|
||||
overrides = {
|
||||
"search.tool": args.search_tool,
|
||||
"llm.provider": args.provider,
|
||||
}
|
||||
if model_name:
|
||||
overrides["llm.model"] = model_name
|
||||
# Read query from stdin
|
||||
query = sys.stdin.read().strip()
|
||||
if not query:
|
||||
print(json.dumps({"error": "No query provided on stdin"}))
|
||||
sys.exit(1)
|
||||
|
||||
# Add API keys from environment
|
||||
if os.environ.get("OPENROUTER_API_KEY"):
|
||||
overrides["llm.openrouter.api_key"] = os.environ[
|
||||
"OPENROUTER_API_KEY"
|
||||
]
|
||||
if os.environ.get("SERPER_API_KEY"):
|
||||
overrides["search.engine.web.serper.api_key"] = os.environ[
|
||||
"SERPER_API_KEY"
|
||||
]
|
||||
# Default model for OpenRouter if not specified
|
||||
model_name = args.model
|
||||
if not model_name and args.provider == "openrouter":
|
||||
model_name = "google/gemini-2.0-flash-001"
|
||||
|
||||
settings = create_settings_snapshot(overrides=overrides)
|
||||
# Check required API keys
|
||||
if args.provider == "openrouter" and not os.environ.get(
|
||||
"OPENROUTER_API_KEY"
|
||||
):
|
||||
print(json.dumps({"error": "OPENROUTER_API_KEY not set"}))
|
||||
sys.exit(1)
|
||||
|
||||
# Build kwargs
|
||||
kwargs = {
|
||||
"query": query,
|
||||
"provider": args.provider,
|
||||
"search_tool": args.search_tool,
|
||||
"settings_snapshot": settings,
|
||||
"programmatic_mode": True,
|
||||
"search_strategy": args.strategy,
|
||||
}
|
||||
if model_name:
|
||||
kwargs["model_name"] = model_name
|
||||
if args.iterations is not None:
|
||||
kwargs["iterations"] = args.iterations
|
||||
if args.search_tool == "serper" and not os.environ.get(
|
||||
"SERPER_API_KEY"
|
||||
):
|
||||
print(json.dumps({"error": "SERPER_API_KEY not set"}))
|
||||
sys.exit(1)
|
||||
|
||||
result = quick_summary(**kwargs)
|
||||
try:
|
||||
from local_deep_research.api import quick_summary
|
||||
from local_deep_research.api.settings_utils import (
|
||||
create_settings_snapshot,
|
||||
)
|
||||
|
||||
# Use formatted_findings if available (already properly formatted with sources)
|
||||
# Fall back to summary if not
|
||||
research_output = result.get("formatted_findings") or result.get(
|
||||
"summary", str(result)
|
||||
)
|
||||
# Build settings overrides
|
||||
overrides = {
|
||||
"search.tool": args.search_tool,
|
||||
"llm.provider": args.provider,
|
||||
}
|
||||
if model_name:
|
||||
overrides["llm.model"] = model_name
|
||||
|
||||
# Build output - make sure everything is JSON serializable
|
||||
output = {
|
||||
"research": research_output,
|
||||
"sources": make_serializable(result.get("sources", [])),
|
||||
"findings": make_serializable(result.get("findings", [])),
|
||||
"iterations": result.get("iterations"),
|
||||
}
|
||||
# Add API keys from environment
|
||||
if os.environ.get("OPENROUTER_API_KEY"):
|
||||
overrides["llm.openrouter.api_key"] = os.environ[
|
||||
"OPENROUTER_API_KEY"
|
||||
]
|
||||
if os.environ.get("SERPER_API_KEY"):
|
||||
overrides["search.engine.web.serper.api_key"] = os.environ[
|
||||
"SERPER_API_KEY"
|
||||
]
|
||||
|
||||
print(json.dumps(output))
|
||||
settings = create_settings_snapshot(overrides=overrides)
|
||||
|
||||
except Exception as e:
|
||||
print(json.dumps({"error": str(e)}))
|
||||
sys.exit(1)
|
||||
# Build kwargs
|
||||
kwargs = {
|
||||
"query": query,
|
||||
"provider": args.provider,
|
||||
"search_tool": args.search_tool,
|
||||
"settings_snapshot": settings,
|
||||
"programmatic_mode": True,
|
||||
"search_strategy": args.strategy,
|
||||
}
|
||||
if model_name:
|
||||
kwargs["model_name"] = model_name
|
||||
if args.iterations is not None:
|
||||
kwargs["iterations"] = args.iterations
|
||||
|
||||
result = quick_summary(**kwargs)
|
||||
|
||||
# Use formatted_findings if available (already properly formatted with sources)
|
||||
# Fall back to summary if not
|
||||
research_output = result.get("formatted_findings") or result.get(
|
||||
"summary", str(result)
|
||||
)
|
||||
|
||||
# Build output - make sure everything is JSON serializable
|
||||
output = {
|
||||
"research": research_output,
|
||||
"sources": make_serializable(result.get("sources", [])),
|
||||
"findings": make_serializable(result.get("findings", [])),
|
||||
"iterations": result.get("iterations"),
|
||||
}
|
||||
|
||||
print(json.dumps(output))
|
||||
|
||||
except Exception as e:
|
||||
print(json.dumps({"error": str(e)}))
|
||||
sys.exit(1)
|
||||
finally:
|
||||
try:
|
||||
sys.stdout.flush()
|
||||
except Exception: # noqa: silent-exception
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user