Files
local-deep-research/examples/api_usage/programmatic/advanced_features_example.py
LearningCircuit 3d917453c8 fix: resolve programmatic API bugs and add CI tests (#1085)
- Fix Ollama API key NoSettingsContextError by using empty string default
- Add ui_element fallback in thread_settings to handle custom overrides
- Pass settings_snapshot to file_write_verifier for programmatic mode
- Auto-detect ui_element type in settings_utils (checkbox/number/json)
- Add api.allow_file_output to programmatic examples
- Add programmatic API tests to api-tests.yml workflow

These fixes enable the programmatic API to work correctly without database
access, as intended for programmatic usage scenarios.
2025-11-19 12:28:37 -05:00

613 lines
17 KiB
Python

#!/usr/bin/env python3
"""
Advanced Features Example for Local Deep Research
This example demonstrates advanced programmatic features including:
1. generate_report() - Create comprehensive markdown reports
2. Export formats - Save reports in different formats
3. Result analysis - Extract and analyze research findings
4. Keyword extraction - Identify key topics and concepts
"""
import json
from typing import Dict, List, Any
from local_deep_research.api import (
generate_report,
detailed_research,
quick_summary,
)
from local_deep_research.api.settings_utils import create_settings_snapshot
def demonstrate_report_generation():
"""
Generate a comprehensive research report using generate_report().
This function creates a structured markdown report with:
- Executive summary
- Detailed findings organized by sections
- Source citations
- Conclusions and recommendations
"""
print("=" * 70)
print("GENERATE COMPREHENSIVE REPORT")
print("=" * 70)
print("""
This demonstrates the generate_report() function which:
- Creates a structured markdown report
- Performs multiple searches per section
- Organizes findings into coherent sections
- Includes citations and references
""")
# Configure settings for programmatic mode
settings = create_settings_snapshot(
overrides={
"programmatic_mode": True,
"search.tool": "wikipedia",
"llm.temperature": 0.5, # Lower for more focused output
"api.allow_file_output": True, # Allow generate_report to save files
}
)
# Generate a comprehensive report
print(
"Generating report on 'Applications of Machine Learning in Healthcare'..."
)
report = generate_report(
query="Applications of Machine Learning in Healthcare",
output_file="ml_healthcare_report.md",
searches_per_section=2, # Multiple searches per section for depth
settings_snapshot=settings,
iterations=2,
questions_per_iteration=3,
)
print("\n✓ Report generated successfully!")
print(f" - Report length: {len(report['content'])} characters")
print(
f" - File saved to: {report.get('file_path', 'ml_healthcare_report.md')}"
)
# Show first part of report
print("\nReport preview (first 500 chars):")
print("-" * 40)
print(report["content"][:500] + "...")
return report
def demonstrate_export_formats():
"""
Show how to export research results in different formats.
Demonstrates:
- Markdown export (default)
- JSON export for programmatic processing
- Custom formatting with templates
"""
print("\n" + "=" * 70)
print("EXPORT FORMATS")
print("=" * 70)
print("""
Exporting research in different formats:
- Markdown: Human-readable reports
- JSON: Structured data for processing
- Custom: Template-based formatting
""")
settings = create_settings_snapshot(
overrides={
"programmatic_mode": True,
"search.tool": "wikipedia",
}
)
# Get research results
result = detailed_research(
query="Renewable energy technologies",
settings_snapshot=settings,
iterations=1,
questions_per_iteration=2,
)
# Export as JSON
json_file = "research_results.json"
with open(json_file, "w", encoding="utf-8") as f:
json.dump(result, f, indent=2, default=str)
print(f"\n✓ JSON export saved to: {json_file}")
print(f" - Contains: {len(result.get('findings', []))} findings")
print(f" - Sources: {len(result.get('sources', []))} sources")
# Export as Markdown
md_content = format_as_markdown(result)
md_file = "research_results.md"
with open(md_file, "w", encoding="utf-8") as f:
f.write(md_content)
print(f"\n✓ Markdown export saved to: {md_file}")
print(f" - Length: {len(md_content)} characters")
# Export as custom format (e.g., BibTeX-like citations)
citations = extract_citations(result)
cite_file = "research_citations.txt"
with open(cite_file, "w", encoding="utf-8") as f:
for i, citation in enumerate(citations, 1):
f.write(f"[{i}] {citation}\n")
print(f"\n✓ Citations export saved to: {cite_file}")
print(f" - Total citations: {len(citations)}")
return result
def demonstrate_result_analysis():
"""
Analyze research results to extract insights and patterns.
Shows how to:
- Extract key findings
- Identify recurring themes
- Analyze source reliability
- Generate statistics
"""
print("\n" + "=" * 70)
print("RESULT ANALYSIS")
print("=" * 70)
print("""
Analyzing research results to extract:
- Key findings and insights
- Common themes and patterns
- Source statistics
- Quality metrics
""")
settings = create_settings_snapshot(
overrides={
"programmatic_mode": True,
"search.tool": "wikipedia",
}
)
# Perform research
result = detailed_research(
query="Impact of artificial intelligence on employment",
settings_snapshot=settings,
search_strategy="source-based",
iterations=2,
questions_per_iteration=3,
)
# Analyze findings
analysis = analyze_findings(result)
print("\n📊 Research Analysis:")
print(f" - Total findings: {analysis['total_findings']}")
print(f" - Unique sources: {analysis['unique_sources']}")
print(f" - Questions explored: {analysis['total_questions']}")
print(f" - Iterations completed: {analysis['iterations']}")
print("\n🔍 Finding Categories:")
for category, count in analysis["categories"].items():
print(f" - {category}: {count} findings")
print("\n📈 Source Distribution:")
for source_type, count in analysis["source_types"].items():
print(f" - {source_type}: {count} sources")
# Extract themes
themes = extract_themes(result)
print("\n🎯 Key Themes Identified:")
for i, theme in enumerate(themes[:5], 1):
print(f" {i}. {theme}")
return analysis
def demonstrate_keyword_extraction():
"""
Extract keywords and key concepts from research results.
Demonstrates:
- Keyword extraction from findings
- Concept identification
- Topic clustering
- Trend analysis
"""
print("\n" + "=" * 70)
print("KEYWORD & CONCEPT EXTRACTION")
print("=" * 70)
print("""
Extracting keywords and concepts:
- Important terms and phrases
- Technical concepts
- Named entities
- Trend indicators
""")
settings = create_settings_snapshot(
overrides={
"programmatic_mode": True,
"search.tool": "wikipedia",
}
)
# Quick research for keyword extraction
result = quick_summary(
query="Quantum computing breakthroughs 2024",
settings_snapshot=settings,
iterations=1,
questions_per_iteration=3,
)
# Extract keywords
keywords = extract_keywords(result)
print("\n🔑 Top Keywords:")
for keyword, frequency in keywords[:10]:
print(f" - {keyword}: {frequency} occurrences")
# Extract concepts
concepts = extract_concepts(result)
print("\n💡 Key Concepts:")
for i, concept in enumerate(concepts[:5], 1):
print(f" {i}. {concept}")
# Identify technical terms
technical_terms = extract_technical_terms(result)
print("\n🔬 Technical Terms:")
for term in technical_terms[:8]:
print(f" - {term}")
return keywords, concepts
def format_as_markdown(result: Dict[str, Any]) -> str:
"""Convert research results to markdown format."""
md = f"# Research Report: {result['query']}\n\n"
md += f"**Research ID:** {result.get('research_id', 'N/A')}\n\n"
# Summary
md += "## Summary\n\n"
md += result.get("summary", "No summary available") + "\n\n"
# Findings
md += "## Key Findings\n\n"
findings = result.get("findings", [])
for i, finding in enumerate(findings, 1):
finding_text = finding if isinstance(finding, str) else str(finding)
md += f"{i}. {finding_text}\n\n"
# Sources
md += "## Sources\n\n"
sources = result.get("sources", [])
for i, source in enumerate(sources, 1):
source_text = source if isinstance(source, str) else str(source)
md += f"- [{i}] {source_text}\n"
# Metadata
md += "\n## Metadata\n\n"
metadata = result.get("metadata", {})
for key, value in metadata.items():
md += f"- **{key}:** {value}\n"
return md
def extract_citations(result: Dict[str, Any]) -> List[str]:
"""Extract citations from research results."""
citations = []
sources = result.get("sources", [])
for source in sources:
if isinstance(source, dict):
# Extract URL or title
citation = source.get("url", source.get("title", str(source)))
else:
citation = str(source)
citations.append(citation)
return citations
def analyze_findings(result: Dict[str, Any]) -> Dict[str, Any]:
"""Analyze research findings for patterns and statistics."""
findings = result.get("findings", [])
sources = result.get("sources", [])
questions = result.get("questions", {})
# Categorize findings (simplified)
categories = {
"positive": 0,
"negative": 0,
"neutral": 0,
"technical": 0,
}
for finding in findings:
finding_text = str(finding).lower()
if any(
word in finding_text
for word in ["benefit", "improve", "enhance", "positive"]
):
categories["positive"] += 1
elif any(
word in finding_text
for word in ["risk", "challenge", "negative", "concern"]
):
categories["negative"] += 1
elif any(
word in finding_text
for word in ["algorithm", "system", "technology", "method"]
):
categories["technical"] += 1
else:
categories["neutral"] += 1
# Analyze sources
source_types = {}
for source in sources:
source_text = str(source).lower()
if "wikipedia" in source_text:
source_type = "Wikipedia"
elif "arxiv" in source_text:
source_type = "ArXiv"
elif "github" in source_text:
source_type = "GitHub"
else:
source_type = "Other"
source_types[source_type] = source_types.get(source_type, 0) + 1
return {
"total_findings": len(findings),
"unique_sources": len(sources),
"total_questions": sum(len(qs) for qs in questions.values()),
"iterations": result.get("iterations", 0),
"categories": categories,
"source_types": source_types,
}
def extract_themes(result: Dict[str, Any]) -> List[str]:
"""Extract main themes from research results."""
# Simplified theme extraction based on common patterns
themes = []
summary = result.get("summary", "")
findings = result.get("findings", [])
# Combine text for analysis
full_text = summary + " ".join(str(f) for f in findings)
# Simple theme patterns (in production, use NLP libraries)
theme_patterns = {
"automation": ["automation", "automated", "automatic"],
"job displacement": ["job loss", "unemployment", "displacement"],
"skill requirements": ["skills", "training", "education"],
"economic impact": ["economy", "economic", "gdp", "growth"],
"innovation": ["innovation", "innovative", "breakthrough"],
}
for theme, keywords in theme_patterns.items():
if any(keyword in full_text.lower() for keyword in keywords):
themes.append(theme.title())
return themes
def extract_keywords(result: Dict[str, Any]) -> List[tuple]:
"""Extract keywords with frequency from research results."""
from collections import Counter
import re
# Combine all text
summary = result.get("summary", "")
findings = " ".join(str(f) for f in result.get("findings", []))
full_text = f"{summary} {findings}".lower()
# Simple word extraction (in production, use NLP libraries)
words = re.findall(r"\b[a-z]{4,}\b", full_text)
# Filter common words
stopwords = {
"that",
"this",
"with",
"from",
"have",
"been",
"were",
"which",
"their",
"about",
}
words = [w for w in words if w not in stopwords]
# Count frequencies
word_freq = Counter(words)
return word_freq.most_common(20)
def extract_concepts(result: Dict[str, Any]) -> List[str]:
"""Extract key concepts from research results."""
concepts = []
summary = result.get("summary", "")
# Simple concept patterns (in production, use NLP for entity extraction)
concept_patterns = [
r"quantum \w+",
r"\w+ computing",
r"\w+ algorithm",
r"machine learning",
r"artificial intelligence",
r"\w+ technology",
]
import re
for pattern in concept_patterns:
matches = re.findall(pattern, summary.lower())
concepts.extend(matches)
# Deduplicate and clean
concepts = list(set(concepts))
return concepts[:10]
def extract_technical_terms(result: Dict[str, Any]) -> List[str]:
"""Extract technical terms from research results."""
technical_terms = []
# Common technical term patterns
tech_indicators = [
"algorithm",
"system",
"protocol",
"framework",
"architecture",
"quantum",
"neural",
"network",
"model",
"optimization",
]
summary = result.get("summary", "").lower()
import re
for indicator in tech_indicators:
# Find words containing or adjacent to technical indicators
pattern = rf"\b\w*{indicator}\w*\b"
matches = re.findall(pattern, summary)
technical_terms.extend(matches)
# Deduplicate
technical_terms = list(set(technical_terms))
return technical_terms
def demonstrate_batch_research():
"""
Show how to perform batch research on multiple topics.
Useful for:
- Comparative analysis
- Trend monitoring
- Systematic reviews
"""
print("\n" + "=" * 70)
print("BATCH RESEARCH PROCESSING")
print("=" * 70)
print("""
Processing multiple research queries:
- Efficient batch processing
- Comparative analysis
- Result aggregation
""")
settings = create_settings_snapshot(
overrides={
"programmatic_mode": True,
"search.tool": "wikipedia",
}
)
# Topics for batch research
topics = [
"Solar energy innovations",
"Wind power technology",
"Hydrogen fuel cells",
]
batch_results = {}
print("\n📚 Batch Research:")
for topic in topics:
print(f"\n Researching: {topic}")
result = quick_summary(
query=topic,
settings_snapshot=settings,
iterations=1,
questions_per_iteration=2,
)
batch_results[topic] = result
print(f" ✓ Found {len(result.get('findings', []))} findings")
# Aggregate results
print("\n📊 Aggregate Analysis:")
total_findings = sum(
len(r.get("findings", [])) for r in batch_results.values()
)
total_sources = sum(
len(r.get("sources", [])) for r in batch_results.values()
)
print(f" - Total topics researched: {len(topics)}")
print(f" - Total findings: {total_findings}")
print(f" - Total sources: {total_sources}")
print(f" - Average findings per topic: {total_findings / len(topics):.1f}")
# Save batch results
batch_file = "batch_research_results.json"
with open(batch_file, "w", encoding="utf-8") as f:
json.dump(batch_results, f, indent=2, default=str)
print(f"\n✓ Batch results saved to: {batch_file}")
return batch_results
def main():
"""Run all advanced feature demonstrations."""
print("=" * 70)
print("LOCAL DEEP RESEARCH - ADVANCED FEATURES DEMONSTRATION")
print("=" * 70)
print("""
This example demonstrates advanced programmatic features:
1. Report generation with generate_report()
2. Multiple export formats
3. Result analysis and insights
4. Keyword and concept extraction
5. Batch research processing
""")
# Run demonstrations
demonstrate_report_generation()
demonstrate_export_formats()
demonstrate_result_analysis()
demonstrate_keyword_extraction()
demonstrate_batch_research()
print("\n" + "=" * 70)
print("DEMONSTRATION COMPLETE")
print("=" * 70)
print("""
✓ All advanced features demonstrated successfully!
Key Takeaways:
1. generate_report() creates comprehensive markdown reports
2. Results can be exported in multiple formats (JSON, MD, custom)
3. Analysis tools extract insights, themes, and patterns
4. Keyword extraction identifies important terms and concepts
5. Batch processing enables systematic research
Files created:
- ml_healthcare_report.md - Full research report
- research_results.json - Structured research data
- research_results.md - Markdown formatted results
- research_citations.txt - Extracted citations
- batch_research_results.json - Batch research results
Next Steps:
- Customize report templates for your domain
- Integrate with data visualization tools
- Build automated research pipelines
- Create domain-specific analysis functions
""")
if __name__ == "__main__":
main()