mirror of
https://github.com/LearningCircuit/local-deep-research.git
synced 2026-06-16 03:51:07 +03:00
- Fix Ollama API key NoSettingsContextError by using empty string default - Add ui_element fallback in thread_settings to handle custom overrides - Pass settings_snapshot to file_write_verifier for programmatic mode - Auto-detect ui_element type in settings_utils (checkbox/number/json) - Add api.allow_file_output to programmatic examples - Add programmatic API tests to api-tests.yml workflow These fixes enable the programmatic API to work correctly without database access, as intended for programmatic usage scenarios.
613 lines
17 KiB
Python
613 lines
17 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Advanced Features Example for Local Deep Research
|
|
|
|
This example demonstrates advanced programmatic features including:
|
|
1. generate_report() - Create comprehensive markdown reports
|
|
2. Export formats - Save reports in different formats
|
|
3. Result analysis - Extract and analyze research findings
|
|
4. Keyword extraction - Identify key topics and concepts
|
|
"""
|
|
|
|
import json
|
|
from typing import Dict, List, Any
|
|
|
|
from local_deep_research.api import (
|
|
generate_report,
|
|
detailed_research,
|
|
quick_summary,
|
|
)
|
|
from local_deep_research.api.settings_utils import create_settings_snapshot
|
|
|
|
|
|
def demonstrate_report_generation():
|
|
"""
|
|
Generate a comprehensive research report using generate_report().
|
|
|
|
This function creates a structured markdown report with:
|
|
- Executive summary
|
|
- Detailed findings organized by sections
|
|
- Source citations
|
|
- Conclusions and recommendations
|
|
"""
|
|
print("=" * 70)
|
|
print("GENERATE COMPREHENSIVE REPORT")
|
|
print("=" * 70)
|
|
print("""
|
|
This demonstrates the generate_report() function which:
|
|
- Creates a structured markdown report
|
|
- Performs multiple searches per section
|
|
- Organizes findings into coherent sections
|
|
- Includes citations and references
|
|
""")
|
|
|
|
# Configure settings for programmatic mode
|
|
settings = create_settings_snapshot(
|
|
overrides={
|
|
"programmatic_mode": True,
|
|
"search.tool": "wikipedia",
|
|
"llm.temperature": 0.5, # Lower for more focused output
|
|
"api.allow_file_output": True, # Allow generate_report to save files
|
|
}
|
|
)
|
|
|
|
# Generate a comprehensive report
|
|
print(
|
|
"Generating report on 'Applications of Machine Learning in Healthcare'..."
|
|
)
|
|
report = generate_report(
|
|
query="Applications of Machine Learning in Healthcare",
|
|
output_file="ml_healthcare_report.md",
|
|
searches_per_section=2, # Multiple searches per section for depth
|
|
settings_snapshot=settings,
|
|
iterations=2,
|
|
questions_per_iteration=3,
|
|
)
|
|
|
|
print("\n✓ Report generated successfully!")
|
|
print(f" - Report length: {len(report['content'])} characters")
|
|
print(
|
|
f" - File saved to: {report.get('file_path', 'ml_healthcare_report.md')}"
|
|
)
|
|
|
|
# Show first part of report
|
|
print("\nReport preview (first 500 chars):")
|
|
print("-" * 40)
|
|
print(report["content"][:500] + "...")
|
|
|
|
return report
|
|
|
|
|
|
def demonstrate_export_formats():
|
|
"""
|
|
Show how to export research results in different formats.
|
|
|
|
Demonstrates:
|
|
- Markdown export (default)
|
|
- JSON export for programmatic processing
|
|
- Custom formatting with templates
|
|
"""
|
|
print("\n" + "=" * 70)
|
|
print("EXPORT FORMATS")
|
|
print("=" * 70)
|
|
print("""
|
|
Exporting research in different formats:
|
|
- Markdown: Human-readable reports
|
|
- JSON: Structured data for processing
|
|
- Custom: Template-based formatting
|
|
""")
|
|
|
|
settings = create_settings_snapshot(
|
|
overrides={
|
|
"programmatic_mode": True,
|
|
"search.tool": "wikipedia",
|
|
}
|
|
)
|
|
|
|
# Get research results
|
|
result = detailed_research(
|
|
query="Renewable energy technologies",
|
|
settings_snapshot=settings,
|
|
iterations=1,
|
|
questions_per_iteration=2,
|
|
)
|
|
|
|
# Export as JSON
|
|
json_file = "research_results.json"
|
|
with open(json_file, "w", encoding="utf-8") as f:
|
|
json.dump(result, f, indent=2, default=str)
|
|
print(f"\n✓ JSON export saved to: {json_file}")
|
|
print(f" - Contains: {len(result.get('findings', []))} findings")
|
|
print(f" - Sources: {len(result.get('sources', []))} sources")
|
|
|
|
# Export as Markdown
|
|
md_content = format_as_markdown(result)
|
|
md_file = "research_results.md"
|
|
with open(md_file, "w", encoding="utf-8") as f:
|
|
f.write(md_content)
|
|
print(f"\n✓ Markdown export saved to: {md_file}")
|
|
print(f" - Length: {len(md_content)} characters")
|
|
|
|
# Export as custom format (e.g., BibTeX-like citations)
|
|
citations = extract_citations(result)
|
|
cite_file = "research_citations.txt"
|
|
with open(cite_file, "w", encoding="utf-8") as f:
|
|
for i, citation in enumerate(citations, 1):
|
|
f.write(f"[{i}] {citation}\n")
|
|
print(f"\n✓ Citations export saved to: {cite_file}")
|
|
print(f" - Total citations: {len(citations)}")
|
|
|
|
return result
|
|
|
|
|
|
def demonstrate_result_analysis():
|
|
"""
|
|
Analyze research results to extract insights and patterns.
|
|
|
|
Shows how to:
|
|
- Extract key findings
|
|
- Identify recurring themes
|
|
- Analyze source reliability
|
|
- Generate statistics
|
|
"""
|
|
print("\n" + "=" * 70)
|
|
print("RESULT ANALYSIS")
|
|
print("=" * 70)
|
|
print("""
|
|
Analyzing research results to extract:
|
|
- Key findings and insights
|
|
- Common themes and patterns
|
|
- Source statistics
|
|
- Quality metrics
|
|
""")
|
|
|
|
settings = create_settings_snapshot(
|
|
overrides={
|
|
"programmatic_mode": True,
|
|
"search.tool": "wikipedia",
|
|
}
|
|
)
|
|
|
|
# Perform research
|
|
result = detailed_research(
|
|
query="Impact of artificial intelligence on employment",
|
|
settings_snapshot=settings,
|
|
search_strategy="source-based",
|
|
iterations=2,
|
|
questions_per_iteration=3,
|
|
)
|
|
|
|
# Analyze findings
|
|
analysis = analyze_findings(result)
|
|
|
|
print("\n📊 Research Analysis:")
|
|
print(f" - Total findings: {analysis['total_findings']}")
|
|
print(f" - Unique sources: {analysis['unique_sources']}")
|
|
print(f" - Questions explored: {analysis['total_questions']}")
|
|
print(f" - Iterations completed: {analysis['iterations']}")
|
|
|
|
print("\n🔍 Finding Categories:")
|
|
for category, count in analysis["categories"].items():
|
|
print(f" - {category}: {count} findings")
|
|
|
|
print("\n📈 Source Distribution:")
|
|
for source_type, count in analysis["source_types"].items():
|
|
print(f" - {source_type}: {count} sources")
|
|
|
|
# Extract themes
|
|
themes = extract_themes(result)
|
|
print("\n🎯 Key Themes Identified:")
|
|
for i, theme in enumerate(themes[:5], 1):
|
|
print(f" {i}. {theme}")
|
|
|
|
return analysis
|
|
|
|
|
|
def demonstrate_keyword_extraction():
|
|
"""
|
|
Extract keywords and key concepts from research results.
|
|
|
|
Demonstrates:
|
|
- Keyword extraction from findings
|
|
- Concept identification
|
|
- Topic clustering
|
|
- Trend analysis
|
|
"""
|
|
print("\n" + "=" * 70)
|
|
print("KEYWORD & CONCEPT EXTRACTION")
|
|
print("=" * 70)
|
|
print("""
|
|
Extracting keywords and concepts:
|
|
- Important terms and phrases
|
|
- Technical concepts
|
|
- Named entities
|
|
- Trend indicators
|
|
""")
|
|
|
|
settings = create_settings_snapshot(
|
|
overrides={
|
|
"programmatic_mode": True,
|
|
"search.tool": "wikipedia",
|
|
}
|
|
)
|
|
|
|
# Quick research for keyword extraction
|
|
result = quick_summary(
|
|
query="Quantum computing breakthroughs 2024",
|
|
settings_snapshot=settings,
|
|
iterations=1,
|
|
questions_per_iteration=3,
|
|
)
|
|
|
|
# Extract keywords
|
|
keywords = extract_keywords(result)
|
|
|
|
print("\n🔑 Top Keywords:")
|
|
for keyword, frequency in keywords[:10]:
|
|
print(f" - {keyword}: {frequency} occurrences")
|
|
|
|
# Extract concepts
|
|
concepts = extract_concepts(result)
|
|
|
|
print("\n💡 Key Concepts:")
|
|
for i, concept in enumerate(concepts[:5], 1):
|
|
print(f" {i}. {concept}")
|
|
|
|
# Identify technical terms
|
|
technical_terms = extract_technical_terms(result)
|
|
|
|
print("\n🔬 Technical Terms:")
|
|
for term in technical_terms[:8]:
|
|
print(f" - {term}")
|
|
|
|
return keywords, concepts
|
|
|
|
|
|
def format_as_markdown(result: Dict[str, Any]) -> str:
|
|
"""Convert research results to markdown format."""
|
|
md = f"# Research Report: {result['query']}\n\n"
|
|
md += f"**Research ID:** {result.get('research_id', 'N/A')}\n\n"
|
|
|
|
# Summary
|
|
md += "## Summary\n\n"
|
|
md += result.get("summary", "No summary available") + "\n\n"
|
|
|
|
# Findings
|
|
md += "## Key Findings\n\n"
|
|
findings = result.get("findings", [])
|
|
for i, finding in enumerate(findings, 1):
|
|
finding_text = finding if isinstance(finding, str) else str(finding)
|
|
md += f"{i}. {finding_text}\n\n"
|
|
|
|
# Sources
|
|
md += "## Sources\n\n"
|
|
sources = result.get("sources", [])
|
|
for i, source in enumerate(sources, 1):
|
|
source_text = source if isinstance(source, str) else str(source)
|
|
md += f"- [{i}] {source_text}\n"
|
|
|
|
# Metadata
|
|
md += "\n## Metadata\n\n"
|
|
metadata = result.get("metadata", {})
|
|
for key, value in metadata.items():
|
|
md += f"- **{key}:** {value}\n"
|
|
|
|
return md
|
|
|
|
|
|
def extract_citations(result: Dict[str, Any]) -> List[str]:
|
|
"""Extract citations from research results."""
|
|
citations = []
|
|
sources = result.get("sources", [])
|
|
|
|
for source in sources:
|
|
if isinstance(source, dict):
|
|
# Extract URL or title
|
|
citation = source.get("url", source.get("title", str(source)))
|
|
else:
|
|
citation = str(source)
|
|
citations.append(citation)
|
|
|
|
return citations
|
|
|
|
|
|
def analyze_findings(result: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Analyze research findings for patterns and statistics."""
|
|
findings = result.get("findings", [])
|
|
sources = result.get("sources", [])
|
|
questions = result.get("questions", {})
|
|
|
|
# Categorize findings (simplified)
|
|
categories = {
|
|
"positive": 0,
|
|
"negative": 0,
|
|
"neutral": 0,
|
|
"technical": 0,
|
|
}
|
|
|
|
for finding in findings:
|
|
finding_text = str(finding).lower()
|
|
if any(
|
|
word in finding_text
|
|
for word in ["benefit", "improve", "enhance", "positive"]
|
|
):
|
|
categories["positive"] += 1
|
|
elif any(
|
|
word in finding_text
|
|
for word in ["risk", "challenge", "negative", "concern"]
|
|
):
|
|
categories["negative"] += 1
|
|
elif any(
|
|
word in finding_text
|
|
for word in ["algorithm", "system", "technology", "method"]
|
|
):
|
|
categories["technical"] += 1
|
|
else:
|
|
categories["neutral"] += 1
|
|
|
|
# Analyze sources
|
|
source_types = {}
|
|
for source in sources:
|
|
source_text = str(source).lower()
|
|
if "wikipedia" in source_text:
|
|
source_type = "Wikipedia"
|
|
elif "arxiv" in source_text:
|
|
source_type = "ArXiv"
|
|
elif "github" in source_text:
|
|
source_type = "GitHub"
|
|
else:
|
|
source_type = "Other"
|
|
source_types[source_type] = source_types.get(source_type, 0) + 1
|
|
|
|
return {
|
|
"total_findings": len(findings),
|
|
"unique_sources": len(sources),
|
|
"total_questions": sum(len(qs) for qs in questions.values()),
|
|
"iterations": result.get("iterations", 0),
|
|
"categories": categories,
|
|
"source_types": source_types,
|
|
}
|
|
|
|
|
|
def extract_themes(result: Dict[str, Any]) -> List[str]:
|
|
"""Extract main themes from research results."""
|
|
# Simplified theme extraction based on common patterns
|
|
themes = []
|
|
summary = result.get("summary", "")
|
|
findings = result.get("findings", [])
|
|
|
|
# Combine text for analysis
|
|
full_text = summary + " ".join(str(f) for f in findings)
|
|
|
|
# Simple theme patterns (in production, use NLP libraries)
|
|
theme_patterns = {
|
|
"automation": ["automation", "automated", "automatic"],
|
|
"job displacement": ["job loss", "unemployment", "displacement"],
|
|
"skill requirements": ["skills", "training", "education"],
|
|
"economic impact": ["economy", "economic", "gdp", "growth"],
|
|
"innovation": ["innovation", "innovative", "breakthrough"],
|
|
}
|
|
|
|
for theme, keywords in theme_patterns.items():
|
|
if any(keyword in full_text.lower() for keyword in keywords):
|
|
themes.append(theme.title())
|
|
|
|
return themes
|
|
|
|
|
|
def extract_keywords(result: Dict[str, Any]) -> List[tuple]:
|
|
"""Extract keywords with frequency from research results."""
|
|
from collections import Counter
|
|
import re
|
|
|
|
# Combine all text
|
|
summary = result.get("summary", "")
|
|
findings = " ".join(str(f) for f in result.get("findings", []))
|
|
full_text = f"{summary} {findings}".lower()
|
|
|
|
# Simple word extraction (in production, use NLP libraries)
|
|
words = re.findall(r"\b[a-z]{4,}\b", full_text)
|
|
|
|
# Filter common words
|
|
stopwords = {
|
|
"that",
|
|
"this",
|
|
"with",
|
|
"from",
|
|
"have",
|
|
"been",
|
|
"were",
|
|
"which",
|
|
"their",
|
|
"about",
|
|
}
|
|
words = [w for w in words if w not in stopwords]
|
|
|
|
# Count frequencies
|
|
word_freq = Counter(words)
|
|
|
|
return word_freq.most_common(20)
|
|
|
|
|
|
def extract_concepts(result: Dict[str, Any]) -> List[str]:
|
|
"""Extract key concepts from research results."""
|
|
concepts = []
|
|
summary = result.get("summary", "")
|
|
|
|
# Simple concept patterns (in production, use NLP for entity extraction)
|
|
concept_patterns = [
|
|
r"quantum \w+",
|
|
r"\w+ computing",
|
|
r"\w+ algorithm",
|
|
r"machine learning",
|
|
r"artificial intelligence",
|
|
r"\w+ technology",
|
|
]
|
|
|
|
import re
|
|
|
|
for pattern in concept_patterns:
|
|
matches = re.findall(pattern, summary.lower())
|
|
concepts.extend(matches)
|
|
|
|
# Deduplicate and clean
|
|
concepts = list(set(concepts))
|
|
|
|
return concepts[:10]
|
|
|
|
|
|
def extract_technical_terms(result: Dict[str, Any]) -> List[str]:
|
|
"""Extract technical terms from research results."""
|
|
technical_terms = []
|
|
|
|
# Common technical term patterns
|
|
tech_indicators = [
|
|
"algorithm",
|
|
"system",
|
|
"protocol",
|
|
"framework",
|
|
"architecture",
|
|
"quantum",
|
|
"neural",
|
|
"network",
|
|
"model",
|
|
"optimization",
|
|
]
|
|
|
|
summary = result.get("summary", "").lower()
|
|
import re
|
|
|
|
for indicator in tech_indicators:
|
|
# Find words containing or adjacent to technical indicators
|
|
pattern = rf"\b\w*{indicator}\w*\b"
|
|
matches = re.findall(pattern, summary)
|
|
technical_terms.extend(matches)
|
|
|
|
# Deduplicate
|
|
technical_terms = list(set(technical_terms))
|
|
|
|
return technical_terms
|
|
|
|
|
|
def demonstrate_batch_research():
|
|
"""
|
|
Show how to perform batch research on multiple topics.
|
|
|
|
Useful for:
|
|
- Comparative analysis
|
|
- Trend monitoring
|
|
- Systematic reviews
|
|
"""
|
|
print("\n" + "=" * 70)
|
|
print("BATCH RESEARCH PROCESSING")
|
|
print("=" * 70)
|
|
print("""
|
|
Processing multiple research queries:
|
|
- Efficient batch processing
|
|
- Comparative analysis
|
|
- Result aggregation
|
|
""")
|
|
|
|
settings = create_settings_snapshot(
|
|
overrides={
|
|
"programmatic_mode": True,
|
|
"search.tool": "wikipedia",
|
|
}
|
|
)
|
|
|
|
# Topics for batch research
|
|
topics = [
|
|
"Solar energy innovations",
|
|
"Wind power technology",
|
|
"Hydrogen fuel cells",
|
|
]
|
|
|
|
batch_results = {}
|
|
|
|
print("\n📚 Batch Research:")
|
|
for topic in topics:
|
|
print(f"\n Researching: {topic}")
|
|
result = quick_summary(
|
|
query=topic,
|
|
settings_snapshot=settings,
|
|
iterations=1,
|
|
questions_per_iteration=2,
|
|
)
|
|
batch_results[topic] = result
|
|
print(f" ✓ Found {len(result.get('findings', []))} findings")
|
|
|
|
# Aggregate results
|
|
print("\n📊 Aggregate Analysis:")
|
|
total_findings = sum(
|
|
len(r.get("findings", [])) for r in batch_results.values()
|
|
)
|
|
total_sources = sum(
|
|
len(r.get("sources", [])) for r in batch_results.values()
|
|
)
|
|
|
|
print(f" - Total topics researched: {len(topics)}")
|
|
print(f" - Total findings: {total_findings}")
|
|
print(f" - Total sources: {total_sources}")
|
|
print(f" - Average findings per topic: {total_findings / len(topics):.1f}")
|
|
|
|
# Save batch results
|
|
batch_file = "batch_research_results.json"
|
|
with open(batch_file, "w", encoding="utf-8") as f:
|
|
json.dump(batch_results, f, indent=2, default=str)
|
|
print(f"\n✓ Batch results saved to: {batch_file}")
|
|
|
|
return batch_results
|
|
|
|
|
|
def main():
|
|
"""Run all advanced feature demonstrations."""
|
|
print("=" * 70)
|
|
print("LOCAL DEEP RESEARCH - ADVANCED FEATURES DEMONSTRATION")
|
|
print("=" * 70)
|
|
print("""
|
|
This example demonstrates advanced programmatic features:
|
|
1. Report generation with generate_report()
|
|
2. Multiple export formats
|
|
3. Result analysis and insights
|
|
4. Keyword and concept extraction
|
|
5. Batch research processing
|
|
""")
|
|
|
|
# Run demonstrations
|
|
demonstrate_report_generation()
|
|
demonstrate_export_formats()
|
|
demonstrate_result_analysis()
|
|
demonstrate_keyword_extraction()
|
|
demonstrate_batch_research()
|
|
|
|
print("\n" + "=" * 70)
|
|
print("DEMONSTRATION COMPLETE")
|
|
print("=" * 70)
|
|
print("""
|
|
✓ All advanced features demonstrated successfully!
|
|
|
|
Key Takeaways:
|
|
1. generate_report() creates comprehensive markdown reports
|
|
2. Results can be exported in multiple formats (JSON, MD, custom)
|
|
3. Analysis tools extract insights, themes, and patterns
|
|
4. Keyword extraction identifies important terms and concepts
|
|
5. Batch processing enables systematic research
|
|
|
|
Files created:
|
|
- ml_healthcare_report.md - Full research report
|
|
- research_results.json - Structured research data
|
|
- research_results.md - Markdown formatted results
|
|
- research_citations.txt - Extracted citations
|
|
- batch_research_results.json - Batch research results
|
|
|
|
Next Steps:
|
|
- Customize report templates for your domain
|
|
- Integrate with data visualization tools
|
|
- Build automated research pipelines
|
|
- Create domain-specific analysis functions
|
|
""")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|