Files
local-deep-research/scripts/ldr-diff-research.py
LearningCircuit c43941dda0 fix: use correct Serper API key settings path in E2E script
The script was setting the API key at 'search.serper.api_key' instead of
the correct path 'search.engine.web.serper.api_key'. This caused the
search engine factory to fail finding the key, falling back to other
engines instead of using Google search via Serper.
2025-12-03 00:24:41 +01:00

229 lines
7.2 KiB
Python
Executable File

#!/usr/bin/env python3
"""
LDR Diff Research Script
Reads a code diff from stdin and uses Local Deep Research to find
relevant documentation, best practices, and known issues.
Usage:
# Diff mode (default) - research a code diff
cat diff.txt | python scripts/ldr-diff-research.py
git diff HEAD~1 | python scripts/ldr-diff-research.py
# Static mode - run a fixed query for regression testing
python scripts/ldr-diff-research.py --mode static
# With CLI arguments (easier local testing)
python scripts/ldr-diff-research.py --provider openrouter --model gpt-4o < diff.txt
Output: JSON with research results, sources, and findings.
Environment variables (can be overridden by CLI args):
OPENROUTER_API_KEY - API key for OpenRouter
SERPER_API_KEY - API key for Serper.dev search
LDR_PROVIDER - LLM provider (default: openrouter)
LDR_SEARCH_TOOL - Search tool (default: serper)
LDR_MODEL - Model name (default: google/gemini-2.0-flash-001 for openrouter)
LDR_ITERATIONS - Research iterations (default: 1)
MAX_DIFF_SIZE - Max diff size in bytes (default: 8000)
Note: This uses the programmatic API and does NOT require a running LDR server.
"""
import argparse
import json
import os
import sys
# Default static query for regression testing
DEFAULT_STATIC_QUERY = "What is Local Deep Research and how does it work?"
def make_serializable(obj):
"""Convert objects to JSON-serializable format."""
if obj is None:
return None
if isinstance(obj, (str, int, float, bool)):
return obj
if isinstance(obj, dict):
return {k: make_serializable(v) for k, v in obj.items()}
if isinstance(obj, (list, tuple)):
return [make_serializable(item) for item in obj]
# Handle LangChain Document objects
if hasattr(obj, "page_content") and hasattr(obj, "metadata"):
return {
"content": obj.page_content,
"metadata": make_serializable(obj.metadata),
}
# Handle other objects with __dict__
if hasattr(obj, "__dict__"):
return make_serializable(obj.__dict__)
# Fallback to string representation
return str(obj)
def parse_args():
parser = argparse.ArgumentParser(
description="Run LDR research on code diffs or static queries"
)
parser.add_argument(
"--mode",
choices=["diff", "static"],
default=os.environ.get("LDR_MODE", "diff"),
help="Research mode: 'diff' for PR diffs, 'static' for fixed query",
)
parser.add_argument(
"--provider",
default=os.environ.get("LDR_PROVIDER", "openrouter"),
help="LLM provider (default: openrouter)",
)
parser.add_argument(
"--search-tool",
default=os.environ.get("LDR_SEARCH_TOOL", "serper"),
help="Search tool (default: serper)",
)
parser.add_argument(
"--model",
default=os.environ.get("LDR_MODEL"),
help="Model name (default: provider's default)",
)
parser.add_argument(
"--iterations",
type=int,
default=int(os.environ.get("LDR_ITERATIONS", "1")),
help="Number of research iterations (default: 1)",
)
parser.add_argument(
"--max-diff-size",
type=int,
default=int(os.environ.get("MAX_DIFF_SIZE", "8000")),
help="Max diff size in bytes (default: 8000)",
)
parser.add_argument(
"--static-query",
default=os.environ.get("LDR_STATIC_QUERY", DEFAULT_STATIC_QUERY),
help=f"Query for static mode (default: '{DEFAULT_STATIC_QUERY}')",
)
return parser.parse_args()
def main():
args = parse_args()
# Determine query based on mode
if args.mode == "static":
query = args.static_query
diff = None
else:
# Read diff from stdin
diff = sys.stdin.read()
if not diff.strip():
print(
json.dumps(
{
"error": "No diff provided (use --mode static for fixed query)"
}
)
)
sys.exit(1)
# Truncate if too large
if len(diff) > args.max_diff_size:
diff = diff[: args.max_diff_size] + "\n... (truncated)"
query = f"""Based on these code changes, research relevant documentation,
best practices, and potential issues. Focus on any libraries, APIs, or patterns used.
Code changes:
{diff}
Research topics to cover:
1. Documentation for any libraries or APIs being used/modified
2. Best practices for the patterns shown
3. Known issues or gotchas related to these changes
4. Security considerations if applicable"""
# Default model for OpenRouter if not specified
model_name = args.model
if not model_name and args.provider == "openrouter":
model_name = "google/gemini-2.0-flash-001"
# Check required API keys
if args.provider == "openrouter" and not os.environ.get(
"OPENROUTER_API_KEY"
):
print(json.dumps({"error": "OPENROUTER_API_KEY not set"}))
sys.exit(1)
if args.search_tool == "serper" and not os.environ.get("SERPER_API_KEY"):
print(json.dumps({"error": "SERPER_API_KEY not set"}))
sys.exit(1)
try:
from local_deep_research.api import quick_summary
from local_deep_research.api.settings_utils import (
create_settings_snapshot,
)
# Build settings overrides
overrides = {
"search.tool": args.search_tool,
"llm.provider": args.provider,
}
if model_name:
overrides["llm.model"] = model_name
# Add API keys from environment
if os.environ.get("OPENROUTER_API_KEY"):
overrides["llm.openrouter.api_key"] = os.environ[
"OPENROUTER_API_KEY"
]
if os.environ.get("SERPER_API_KEY"):
overrides["search.engine.web.serper.api_key"] = os.environ[
"SERPER_API_KEY"
]
settings = create_settings_snapshot(overrides=overrides)
# Build kwargs
kwargs = {
"query": query,
"provider": args.provider,
"settings_snapshot": settings,
"programmatic_mode": True,
"iterations": args.iterations,
}
if model_name:
kwargs["model_name"] = model_name
result = quick_summary(**kwargs)
# Use formatted_findings if available (already properly formatted with sources)
# Fall back to summary if not
research_output = result.get("formatted_findings") or result.get(
"summary", str(result)
)
# Build output - make sure everything is JSON serializable
output = {
"mode": args.mode,
"research": research_output,
"sources": make_serializable(result.get("sources", [])),
"findings": make_serializable(result.get("findings", [])),
"iterations": result.get("iterations", args.iterations),
}
# Include query for static mode (for verification)
if args.mode == "static":
output["query"] = query
print(json.dumps(output))
except Exception as e:
print(json.dumps({"error": str(e)}))
sys.exit(1)
if __name__ == "__main__":
main()