mirror of
https://github.com/LearningCircuit/local-deep-research.git
synced 2026-06-16 12:02:34 +03:00
* refactor: Replace programmatic_mode setting with explicit argument (#627) Move programmatic_mode from settings_snapshot to an explicit constructor parameter for better API clarity and to prevent accidental modification. Changes: - Add programmatic_mode as explicit parameter to AdvancedSearchSystem - Update BaseSearchEngine and RateLimitTracker to accept programmatic_mode - Maintain backward compatibility in research_functions.py - Update all examples to use new parameter style - Update tests to reflect new API This change makes the programmatic mode behavior more explicit and prevents users from accidentally modifying this internal flag through settings. Thanks to @djpetti for the review and suggestion in PR #616. * feat: Complete programmatic_mode refactor as per review feedback - Made programmatic_mode an explicit parameter with default value True in research_functions - Added programmatic_mode parameter to create_search_engine and get_search functions - Updated MetaSearchEngine and LocalAllSearchEngine to accept and pass programmatic_mode - Removed extraction of programmatic_mode from settings_snapshot This completes the refactor suggested by @djpetti, fully eliminating programmatic_mode from settings and making it a first-class parameter. Thanks again to @djpetti for the thorough review\! * fix: Complete removal of programmatic_mode from settings_snapshot - Remove programmatic_mode from all example settings_snapshot calls - Add programmatic_mode=True as explicit parameter to all API function calls - Update README documentation to reflect the new API pattern - This addresses @djpetti's review comments to fully eliminate programmatic_mode from settings As noted by @djpetti, programmatic_mode should be a normal function argument rather than a setting, since it's an internal flag that users shouldn't modify. All examples now properly pass it as an explicit parameter to quick_summary(), detailed_research(), and other API functions.
177 lines
6.3 KiB
Python
177 lines
6.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Example of using SearXNG search engine with Local Deep Research.
|
|
|
|
This demonstrates how to use SearXNG for web search in programmatic mode.
|
|
Note: Requires a running SearXNG instance.
|
|
"""
|
|
|
|
import os
|
|
from langchain_ollama import ChatOllama
|
|
from local_deep_research.search_system import AdvancedSearchSystem
|
|
from local_deep_research.web_search_engines.engines.search_engine_searxng import (
|
|
SearXNGSearchEngine,
|
|
)
|
|
|
|
# Re-enable logging
|
|
from loguru import logger
|
|
import sys
|
|
|
|
logger.remove()
|
|
logger.add(sys.stderr, level="INFO", format="{time} {level} {message}")
|
|
logger.enable("local_deep_research")
|
|
|
|
|
|
def main():
|
|
"""Demonstrate using SearXNG with Local Deep Research."""
|
|
print("=== SearXNG Search Engine Example ===\n")
|
|
|
|
# Check if SearXNG URL is configured
|
|
searxng_url = os.getenv("SEARXNG_URL", "http://localhost:8080")
|
|
print(f"Using SearXNG instance at: {searxng_url}")
|
|
print(
|
|
"(Set SEARXNG_URL environment variable to use a different instance)\n"
|
|
)
|
|
|
|
# 1. Create LLM
|
|
print("1. Setting up Ollama LLM...")
|
|
llm = ChatOllama(model="gemma3:12b", temperature=0.3)
|
|
|
|
# 2. Configure settings
|
|
settings = {
|
|
"search.iterations": 2,
|
|
"search.questions_per_iteration": 3,
|
|
"search.strategy": "source-based",
|
|
"rate_limiting.enabled": False, # Disable rate limiting for demo
|
|
# SearXNG specific settings
|
|
"search_engines.searxng.base_url": searxng_url,
|
|
"search_engines.searxng.timeout": 30,
|
|
"search_engines.searxng.categories": ["general", "science"],
|
|
"search_engines.searxng.engines": ["google", "duckduckgo", "bing"],
|
|
"search_engines.searxng.language": "en",
|
|
"search_engines.searxng.time_range": "", # all time
|
|
"search_engines.searxng.safesearch": 0, # 0=off, 1=moderate, 2=strict
|
|
}
|
|
|
|
# 3. Create SearXNG search engine
|
|
print("2. Initializing SearXNG search engine...")
|
|
try:
|
|
search_engine = SearXNGSearchEngine(settings_snapshot=settings)
|
|
|
|
# Test the connection
|
|
print(" Testing SearXNG connection...")
|
|
test_results = search_engine.run("test query", research_context={})
|
|
if test_results:
|
|
print(
|
|
f" ✓ SearXNG is working! Got {len(test_results)} test results."
|
|
)
|
|
else:
|
|
print(" ⚠ SearXNG returned no results for test query.")
|
|
except Exception as e:
|
|
print(f"\n⚠ Error connecting to SearXNG: {e}")
|
|
print("\nPlease ensure SearXNG is running. You can start it with:")
|
|
print(" docker run -p 8888:8080 searxng/searxng")
|
|
print("\nFalling back to mock search engine for demonstration...")
|
|
|
|
# Fallback to mock search engine
|
|
class MockSearchEngine:
|
|
def __init__(self, settings_snapshot=None):
|
|
self.settings_snapshot = settings_snapshot or {}
|
|
|
|
def run(self, query, research_context=None):
|
|
return [
|
|
{
|
|
"title": f"Result for: {query}",
|
|
"link": "https://example.com/result",
|
|
"snippet": f"This is a mock result for the query: {query}. "
|
|
"In a real scenario, SearXNG would provide actual web search results.",
|
|
"full_content": "Full content would be fetched here...",
|
|
"rank": 1,
|
|
}
|
|
]
|
|
|
|
search_engine = MockSearchEngine(settings)
|
|
|
|
# 4. Create the search system
|
|
print("3. Creating AdvancedSearchSystem...")
|
|
# Pass programmatic_mode=True to disable database dependencies
|
|
search_system = AdvancedSearchSystem(
|
|
llm=llm,
|
|
search=search_engine,
|
|
settings_snapshot=settings,
|
|
programmatic_mode=True,
|
|
)
|
|
|
|
# 5. Run research queries
|
|
queries = [
|
|
"What are the latest developments in quantum computing in 2024?",
|
|
"How does CRISPR gene editing technology work?",
|
|
]
|
|
|
|
for query in queries:
|
|
print(f"\n{'=' * 60}")
|
|
print(f"Research Query: {query}")
|
|
print("=" * 60)
|
|
|
|
try:
|
|
result = search_system.analyze_topic(query)
|
|
|
|
# Display results
|
|
print("\n=== RESEARCH FINDINGS ===")
|
|
if result.get("formatted_findings"):
|
|
print(result["formatted_findings"])
|
|
else:
|
|
print(
|
|
"Summary:", result.get("current_knowledge", "No findings")
|
|
)
|
|
|
|
# Show metadata
|
|
print("\n=== METADATA ===")
|
|
print(f"• Iterations completed: {result.get('iterations', 0)}")
|
|
print(f"• Total findings: {len(result.get('findings', []))}")
|
|
|
|
# Show search sources from all_links_of_system or search_results in findings
|
|
all_links = result.get("all_links_of_system", [])
|
|
|
|
# Also check findings for search_results
|
|
for finding in result.get("findings", []):
|
|
if "search_results" in finding and finding["search_results"]:
|
|
all_links = finding["search_results"]
|
|
break
|
|
|
|
if all_links:
|
|
print(f"• Sources found: {len(all_links)}")
|
|
for i, link in enumerate(
|
|
all_links[:5], 1
|
|
): # Show first 5 sources
|
|
if isinstance(link, dict):
|
|
title = link.get("title", "No title")
|
|
url = link.get("link", "Unknown")
|
|
print(f" [{i}] {title}")
|
|
print(f" {url}")
|
|
|
|
# Show generated questions
|
|
if result.get("questions_by_iteration"):
|
|
print("\n=== RESEARCH QUESTIONS ===")
|
|
for iteration, questions in result[
|
|
"questions_by_iteration"
|
|
].items():
|
|
print(f"Iteration {iteration}:")
|
|
for q in questions[
|
|
:2
|
|
]: # Show first 2 questions per iteration
|
|
print(f" • {q}")
|
|
|
|
except Exception as e:
|
|
logger.exception("Error during research")
|
|
print(f"\n⚠ Error: {e}")
|
|
|
|
print("\n✓ SearXNG integration example completed!")
|
|
print(
|
|
"\nNote: For best results, ensure SearXNG is properly configured with multiple search engines."
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|