mirror of
https://github.com/LearningCircuit/local-deep-research.git
synced 2026-06-16 03:51:07 +03:00
The script was setting the API key at 'search.serper.api_key' instead of the correct path 'search.engine.web.serper.api_key'. This caused the search engine factory to fail finding the key, falling back to other engines instead of using Google search via Serper.
229 lines
7.2 KiB
Python
Executable File
229 lines
7.2 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
LDR Diff Research Script
|
|
|
|
Reads a code diff from stdin and uses Local Deep Research to find
|
|
relevant documentation, best practices, and known issues.
|
|
|
|
Usage:
|
|
# Diff mode (default) - research a code diff
|
|
cat diff.txt | python scripts/ldr-diff-research.py
|
|
git diff HEAD~1 | python scripts/ldr-diff-research.py
|
|
|
|
# Static mode - run a fixed query for regression testing
|
|
python scripts/ldr-diff-research.py --mode static
|
|
|
|
# With CLI arguments (easier local testing)
|
|
python scripts/ldr-diff-research.py --provider openrouter --model gpt-4o < diff.txt
|
|
|
|
Output: JSON with research results, sources, and findings.
|
|
|
|
Environment variables (can be overridden by CLI args):
|
|
OPENROUTER_API_KEY - API key for OpenRouter
|
|
SERPER_API_KEY - API key for Serper.dev search
|
|
LDR_PROVIDER - LLM provider (default: openrouter)
|
|
LDR_SEARCH_TOOL - Search tool (default: serper)
|
|
LDR_MODEL - Model name (default: google/gemini-2.0-flash-001 for openrouter)
|
|
LDR_ITERATIONS - Research iterations (default: 1)
|
|
MAX_DIFF_SIZE - Max diff size in bytes (default: 8000)
|
|
|
|
Note: This uses the programmatic API and does NOT require a running LDR server.
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sys
|
|
|
|
# Default static query for regression testing
|
|
DEFAULT_STATIC_QUERY = "What is Local Deep Research and how does it work?"
|
|
|
|
|
|
def make_serializable(obj):
|
|
"""Convert objects to JSON-serializable format."""
|
|
if obj is None:
|
|
return None
|
|
if isinstance(obj, (str, int, float, bool)):
|
|
return obj
|
|
if isinstance(obj, dict):
|
|
return {k: make_serializable(v) for k, v in obj.items()}
|
|
if isinstance(obj, (list, tuple)):
|
|
return [make_serializable(item) for item in obj]
|
|
# Handle LangChain Document objects
|
|
if hasattr(obj, "page_content") and hasattr(obj, "metadata"):
|
|
return {
|
|
"content": obj.page_content,
|
|
"metadata": make_serializable(obj.metadata),
|
|
}
|
|
# Handle other objects with __dict__
|
|
if hasattr(obj, "__dict__"):
|
|
return make_serializable(obj.__dict__)
|
|
# Fallback to string representation
|
|
return str(obj)
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(
|
|
description="Run LDR research on code diffs or static queries"
|
|
)
|
|
parser.add_argument(
|
|
"--mode",
|
|
choices=["diff", "static"],
|
|
default=os.environ.get("LDR_MODE", "diff"),
|
|
help="Research mode: 'diff' for PR diffs, 'static' for fixed query",
|
|
)
|
|
parser.add_argument(
|
|
"--provider",
|
|
default=os.environ.get("LDR_PROVIDER", "openrouter"),
|
|
help="LLM provider (default: openrouter)",
|
|
)
|
|
parser.add_argument(
|
|
"--search-tool",
|
|
default=os.environ.get("LDR_SEARCH_TOOL", "serper"),
|
|
help="Search tool (default: serper)",
|
|
)
|
|
parser.add_argument(
|
|
"--model",
|
|
default=os.environ.get("LDR_MODEL"),
|
|
help="Model name (default: provider's default)",
|
|
)
|
|
parser.add_argument(
|
|
"--iterations",
|
|
type=int,
|
|
default=int(os.environ.get("LDR_ITERATIONS", "1")),
|
|
help="Number of research iterations (default: 1)",
|
|
)
|
|
parser.add_argument(
|
|
"--max-diff-size",
|
|
type=int,
|
|
default=int(os.environ.get("MAX_DIFF_SIZE", "8000")),
|
|
help="Max diff size in bytes (default: 8000)",
|
|
)
|
|
parser.add_argument(
|
|
"--static-query",
|
|
default=os.environ.get("LDR_STATIC_QUERY", DEFAULT_STATIC_QUERY),
|
|
help=f"Query for static mode (default: '{DEFAULT_STATIC_QUERY}')",
|
|
)
|
|
return parser.parse_args()
|
|
|
|
|
|
def main():
|
|
args = parse_args()
|
|
|
|
# Determine query based on mode
|
|
if args.mode == "static":
|
|
query = args.static_query
|
|
diff = None
|
|
else:
|
|
# Read diff from stdin
|
|
diff = sys.stdin.read()
|
|
if not diff.strip():
|
|
print(
|
|
json.dumps(
|
|
{
|
|
"error": "No diff provided (use --mode static for fixed query)"
|
|
}
|
|
)
|
|
)
|
|
sys.exit(1)
|
|
|
|
# Truncate if too large
|
|
if len(diff) > args.max_diff_size:
|
|
diff = diff[: args.max_diff_size] + "\n... (truncated)"
|
|
|
|
query = f"""Based on these code changes, research relevant documentation,
|
|
best practices, and potential issues. Focus on any libraries, APIs, or patterns used.
|
|
|
|
Code changes:
|
|
{diff}
|
|
|
|
Research topics to cover:
|
|
1. Documentation for any libraries or APIs being used/modified
|
|
2. Best practices for the patterns shown
|
|
3. Known issues or gotchas related to these changes
|
|
4. Security considerations if applicable"""
|
|
|
|
# Default model for OpenRouter if not specified
|
|
model_name = args.model
|
|
if not model_name and args.provider == "openrouter":
|
|
model_name = "google/gemini-2.0-flash-001"
|
|
|
|
# Check required API keys
|
|
if args.provider == "openrouter" and not os.environ.get(
|
|
"OPENROUTER_API_KEY"
|
|
):
|
|
print(json.dumps({"error": "OPENROUTER_API_KEY not set"}))
|
|
sys.exit(1)
|
|
|
|
if args.search_tool == "serper" and not os.environ.get("SERPER_API_KEY"):
|
|
print(json.dumps({"error": "SERPER_API_KEY not set"}))
|
|
sys.exit(1)
|
|
|
|
try:
|
|
from local_deep_research.api import quick_summary
|
|
from local_deep_research.api.settings_utils import (
|
|
create_settings_snapshot,
|
|
)
|
|
|
|
# Build settings overrides
|
|
overrides = {
|
|
"search.tool": args.search_tool,
|
|
"llm.provider": args.provider,
|
|
}
|
|
if model_name:
|
|
overrides["llm.model"] = model_name
|
|
|
|
# Add API keys from environment
|
|
if os.environ.get("OPENROUTER_API_KEY"):
|
|
overrides["llm.openrouter.api_key"] = os.environ[
|
|
"OPENROUTER_API_KEY"
|
|
]
|
|
if os.environ.get("SERPER_API_KEY"):
|
|
overrides["search.engine.web.serper.api_key"] = os.environ[
|
|
"SERPER_API_KEY"
|
|
]
|
|
|
|
settings = create_settings_snapshot(overrides=overrides)
|
|
|
|
# Build kwargs
|
|
kwargs = {
|
|
"query": query,
|
|
"provider": args.provider,
|
|
"settings_snapshot": settings,
|
|
"programmatic_mode": True,
|
|
"iterations": args.iterations,
|
|
}
|
|
if model_name:
|
|
kwargs["model_name"] = model_name
|
|
|
|
result = quick_summary(**kwargs)
|
|
|
|
# Use formatted_findings if available (already properly formatted with sources)
|
|
# Fall back to summary if not
|
|
research_output = result.get("formatted_findings") or result.get(
|
|
"summary", str(result)
|
|
)
|
|
|
|
# Build output - make sure everything is JSON serializable
|
|
output = {
|
|
"mode": args.mode,
|
|
"research": research_output,
|
|
"sources": make_serializable(result.get("sources", [])),
|
|
"findings": make_serializable(result.get("findings", [])),
|
|
"iterations": result.get("iterations", args.iterations),
|
|
}
|
|
|
|
# Include query for static mode (for verification)
|
|
if args.mode == "static":
|
|
output["query"] = query
|
|
|
|
print(json.dumps(output))
|
|
|
|
except Exception as e:
|
|
print(json.dumps({"error": str(e)}))
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|