Files
local-deep-research/examples/api_usage/programmatic/searxng_example.py
LearningCircuit 94291ea5ed refactor: Replace programmatic_mode setting with explicit argument (#627) (#633)
* refactor: Replace programmatic_mode setting with explicit argument (#627)

Move programmatic_mode from settings_snapshot to an explicit constructor
parameter for better API clarity and to prevent accidental modification.

Changes:
- Add programmatic_mode as explicit parameter to AdvancedSearchSystem
- Update BaseSearchEngine and RateLimitTracker to accept programmatic_mode
- Maintain backward compatibility in research_functions.py
- Update all examples to use new parameter style
- Update tests to reflect new API

This change makes the programmatic mode behavior more explicit and
prevents users from accidentally modifying this internal flag through settings.

Thanks to @djpetti for the review and suggestion in PR #616.

* feat: Complete programmatic_mode refactor as per review feedback

- Made programmatic_mode an explicit parameter with default value True in research_functions
- Added programmatic_mode parameter to create_search_engine and get_search functions
- Updated MetaSearchEngine and LocalAllSearchEngine to accept and pass programmatic_mode
- Removed extraction of programmatic_mode from settings_snapshot

This completes the refactor suggested by @djpetti, fully eliminating
programmatic_mode from settings and making it a first-class parameter.

Thanks again to @djpetti for the thorough review\!

* fix: Complete removal of programmatic_mode from settings_snapshot

- Remove programmatic_mode from all example settings_snapshot calls
- Add programmatic_mode=True as explicit parameter to all API function calls
- Update README documentation to reflect the new API pattern
- This addresses @djpetti's review comments to fully eliminate programmatic_mode from settings

As noted by @djpetti, programmatic_mode should be a normal function argument
rather than a setting, since it's an internal flag that users shouldn't modify.
All examples now properly pass it as an explicit parameter to quick_summary(),
detailed_research(), and other API functions.
2025-08-13 17:36:09 -04:00

177 lines
6.3 KiB
Python

#!/usr/bin/env python3
"""
Example of using SearXNG search engine with Local Deep Research.
This demonstrates how to use SearXNG for web search in programmatic mode.
Note: Requires a running SearXNG instance.
"""
import os
from langchain_ollama import ChatOllama
from local_deep_research.search_system import AdvancedSearchSystem
from local_deep_research.web_search_engines.engines.search_engine_searxng import (
SearXNGSearchEngine,
)
# Re-enable logging
from loguru import logger
import sys
logger.remove()
logger.add(sys.stderr, level="INFO", format="{time} {level} {message}")
logger.enable("local_deep_research")
def main():
"""Demonstrate using SearXNG with Local Deep Research."""
print("=== SearXNG Search Engine Example ===\n")
# Check if SearXNG URL is configured
searxng_url = os.getenv("SEARXNG_URL", "http://localhost:8080")
print(f"Using SearXNG instance at: {searxng_url}")
print(
"(Set SEARXNG_URL environment variable to use a different instance)\n"
)
# 1. Create LLM
print("1. Setting up Ollama LLM...")
llm = ChatOllama(model="gemma3:12b", temperature=0.3)
# 2. Configure settings
settings = {
"search.iterations": 2,
"search.questions_per_iteration": 3,
"search.strategy": "source-based",
"rate_limiting.enabled": False, # Disable rate limiting for demo
# SearXNG specific settings
"search_engines.searxng.base_url": searxng_url,
"search_engines.searxng.timeout": 30,
"search_engines.searxng.categories": ["general", "science"],
"search_engines.searxng.engines": ["google", "duckduckgo", "bing"],
"search_engines.searxng.language": "en",
"search_engines.searxng.time_range": "", # all time
"search_engines.searxng.safesearch": 0, # 0=off, 1=moderate, 2=strict
}
# 3. Create SearXNG search engine
print("2. Initializing SearXNG search engine...")
try:
search_engine = SearXNGSearchEngine(settings_snapshot=settings)
# Test the connection
print(" Testing SearXNG connection...")
test_results = search_engine.run("test query", research_context={})
if test_results:
print(
f" ✓ SearXNG is working! Got {len(test_results)} test results."
)
else:
print(" ⚠ SearXNG returned no results for test query.")
except Exception as e:
print(f"\n⚠ Error connecting to SearXNG: {e}")
print("\nPlease ensure SearXNG is running. You can start it with:")
print(" docker run -p 8888:8080 searxng/searxng")
print("\nFalling back to mock search engine for demonstration...")
# Fallback to mock search engine
class MockSearchEngine:
def __init__(self, settings_snapshot=None):
self.settings_snapshot = settings_snapshot or {}
def run(self, query, research_context=None):
return [
{
"title": f"Result for: {query}",
"link": "https://example.com/result",
"snippet": f"This is a mock result for the query: {query}. "
"In a real scenario, SearXNG would provide actual web search results.",
"full_content": "Full content would be fetched here...",
"rank": 1,
}
]
search_engine = MockSearchEngine(settings)
# 4. Create the search system
print("3. Creating AdvancedSearchSystem...")
# Pass programmatic_mode=True to disable database dependencies
search_system = AdvancedSearchSystem(
llm=llm,
search=search_engine,
settings_snapshot=settings,
programmatic_mode=True,
)
# 5. Run research queries
queries = [
"What are the latest developments in quantum computing in 2024?",
"How does CRISPR gene editing technology work?",
]
for query in queries:
print(f"\n{'=' * 60}")
print(f"Research Query: {query}")
print("=" * 60)
try:
result = search_system.analyze_topic(query)
# Display results
print("\n=== RESEARCH FINDINGS ===")
if result.get("formatted_findings"):
print(result["formatted_findings"])
else:
print(
"Summary:", result.get("current_knowledge", "No findings")
)
# Show metadata
print("\n=== METADATA ===")
print(f"• Iterations completed: {result.get('iterations', 0)}")
print(f"• Total findings: {len(result.get('findings', []))}")
# Show search sources from all_links_of_system or search_results in findings
all_links = result.get("all_links_of_system", [])
# Also check findings for search_results
for finding in result.get("findings", []):
if "search_results" in finding and finding["search_results"]:
all_links = finding["search_results"]
break
if all_links:
print(f"• Sources found: {len(all_links)}")
for i, link in enumerate(
all_links[:5], 1
): # Show first 5 sources
if isinstance(link, dict):
title = link.get("title", "No title")
url = link.get("link", "Unknown")
print(f" [{i}] {title}")
print(f" {url}")
# Show generated questions
if result.get("questions_by_iteration"):
print("\n=== RESEARCH QUESTIONS ===")
for iteration, questions in result[
"questions_by_iteration"
].items():
print(f"Iteration {iteration}:")
for q in questions[
:2
]: # Show first 2 questions per iteration
print(f"{q}")
except Exception as e:
logger.exception("Error during research")
print(f"\n⚠ Error: {e}")
print("\n✓ SearXNG integration example completed!")
print(
"\nNote: For best results, ensure SearXNG is properly configured with multiple search engines."
)
if __name__ == "__main__":
main()