local-deep-research/examples/api_usage/http/advanced/simple_http_example.py

#!/usr/bin/env python3
"""
Simple HTTP API Example for Local Deep Research v1.0+

This example shows how to use the LDR API with authentication.
Works completely out of the box with automatic user creation.

================================================================================
IMPORTANT - LOCALHOST ONLY
================================================================================
This example ONLY works when connecting via localhost:
  ✅ http://localhost:5000
  ✅ http://127.0.0.1:5000

It will NOT work via http://192.168.x.x:5000 or other non-localhost addresses.

WHY: Session cookies require HTTPS for non-localhost (security).

SOLUTIONS for non-localhost:
1. HTTPS with reverse proxy (production)
2. SSH tunnel: ssh -L 5000:localhost:5000 user@server
3. TESTING=1 env var (INSECURE - dev only!)

WARNING: TESTING=1 disables cookie security. Never use in production.
================================================================================
"""

import requests
import time
import sys
from bs4 import BeautifulSoup
from pathlib import Path

# Add the src directory to Python path for programmatic user creation
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "src"))

from local_deep_research.database.encrypted_db import DatabaseManager
from local_deep_research.database.models import User
from local_deep_research.database.auth_db import get_auth_db_session

# Configuration
API_URL = "http://localhost:5000"


def create_test_user():
    """Create a test user programmatically."""
    username = f"testuser_{int(time.time())}"
    password = "testpassword123"

    print(f"Creating test user: {username}")

    try:
        # Create user in auth database
        auth_db = get_auth_db_session()
        new_user = User(username=username)
        auth_db.add(new_user)
        auth_db.commit()
        auth_db.close()

        # Create encrypted database for user
        db_manager = DatabaseManager()
        db_manager.create_user_database(username, password)

        print(f"✅ User created successfully: {username}")
        return username, password

    except Exception as e:
        print(f"❌ Failed to create user: {e}")
        return None, None


def main():
    print("=== LDR HTTP API Example ===")
    print("🎯 This example works completely out of the box!\n")

    print("⚠️  IMPORTANT NOTES:")
    print("   • This script may take several minutes to complete")
    print("   • Research progress can be monitored in the server logs")
    print("   • Server logs are available at: /tmp/ldr_server.log")
    print(
        "   • Use 'tail -f /tmp/ldr_server.log' to monitor progress in real-time"
    )
    print("   • Results will be available at the URL shown when complete\n")

    # Check if server is running
    try:
        response = requests.get(f"{API_URL}/", timeout=5)
        if response.status_code != 200:
            print("❌ Server is not responding correctly")
            print("\n📋 HOW TO START THE SERVER:")
            print("   • Option 1: python -m local_deep_research.web.app")
            print(
                "   • Option 2: bash scripts/dev/restart_server.sh (recommended)"
            )
            print(
                "   • Note: restart_server.sh will kill existing server process"
            )
            sys.exit(1)
        print("✅ Server is running")
    except Exception:
        print(
            "❌ Cannot connect to server. Please make sure it's running on http://localhost:5000"
        )
        print("\n📋 HOW TO START THE SERVER:")
        print("   • Option 1: python -m local_deep_research.web.app")
        print("   • Option 2: bash scripts/dev/restart_server.sh (recommended)")
        print("   • Note: restart_server.sh will kill existing server process")
        sys.exit(1)

    # Create test user automatically
    username, password = create_test_user()
    if not username:
        print("❌ Failed to create test user")
        sys.exit(1)

    # Create a session to persist cookies
    session = requests.Session()
    print(f"\nTesting with user: {username}")

    # Step 1: Login
    print("\n1. Authenticating...")

    # Get login page and CSRF token
    login_page = session.get(f"{API_URL}/auth/login")
    soup = BeautifulSoup(login_page.text, "html.parser")
    csrf_input = soup.find("input", {"name": "csrf_token"})
    login_csrf = csrf_input.get("value")

    if not login_csrf:
        print("❌ Could not get CSRF token from login page")
        sys.exit(1)

    # Login with form data (not JSON)
    login_response = session.post(
        f"{API_URL}/auth/login",
        data={
            "username": username,
            "password": password,
            "csrf_token": login_csrf,
        },
        allow_redirects=False,
    )

    if login_response.status_code not in [200, 302]:
        print(f"❌ Login failed: {login_response.text}")
        print("\nPlease ensure:")
        print("- The server is running: python -m local_deep_research.web.app")
        sys.exit(1)

    print("✅ Login successful")

    # Step 2: Get CSRF token
    print("\n2. Getting CSRF token...")
    csrf_response = session.get(f"{API_URL}/auth/csrf-token")
    csrf_token = csrf_response.json()["csrf_token"]
    headers = {"X-CSRF-Token": csrf_token}
    print("✅ CSRF token obtained")

    # Initialize research_id to None
    research_id = None

    # Example 1: Quick Summary (using the start endpoint)
    print("\n=== Example 1: Quick Summary ===")
    print(
        "📝 This example demonstrates starting a research query and polling for results"
    )
    print("⏱️  This typically takes 1-3 minutes to complete\n")

    research_request = {
        "query": "What is machine learning?",
        "model": None,  # Will use default from settings
        "search_engines": ["wikipedia"],  # Fast for demo
        "iterations": 1,
        "questions_per_iteration": 2,
    }

    # Start research - CORRECT ENDPOINT
    print("🚀 Starting research...")
    start_response = session.post(
        f"{API_URL}/api/start_research", json=research_request, headers=headers
    )

    if start_response.status_code != 200:
        print(f"❌ Failed to start research: {start_response.text}")
        sys.exit(1)

    research_data = start_response.json()
    research_id = research_data["research_id"]
    print("✅ Research started successfully!")
    print(f"🆔 Research ID: {research_id}")
    print("📊 Monitor progress in server logs: tail -f /tmp/ldr_server.log")
    print(f"🌐 Results will be available at: {API_URL}/results/{research_id}\n")

    # Poll for results
    print("⏳ Waiting for research to complete...")
    print(
        "⚠️  NOTE: This will poll for up to 3 minutes to ensure research completes"
    )
    print(
        "   If it fails, the research may still be running - check the results URL\n"
    )

    poll_count = 0
    max_polls = 18  # Maximum 3 minutes (18 * 10 seconds)

    while poll_count < max_polls:
        status_response = session.get(
            f"{API_URL}/api/research/{research_id}/status"
        )

        if status_response.status_code == 200:
            status = status_response.json()
            current_status = status.get("status", "unknown")
            progress = status.get("progress", 0)

            poll_count += 1
            elapsed_time = poll_count * 10  # 10 seconds per poll
            print(
                f"  Check {poll_count} ({elapsed_time}s): Status = {current_status} (Progress: {progress}%)"
            )

            if current_status == "completed":
                print("🎉 Research completed successfully!")
                break
            elif current_status == "failed":
                print(
                    f"❌ Research failed: {status.get('error', 'Unknown error')}"
                )
                print(
                    "📋 Check server logs for details: tail -f /tmp/ldr_server.log"
                )
                sys.exit(1)
            elif current_status in ["queued", "in_progress"]:
                # Continue polling
                pass
            else:
                print(f"⚠️  Unexpected status: {current_status}")

        else:
            print(
                f"⚠️  Status check failed with code: {status_response.status_code}"
            )

        time.sleep(10)  # Wait 10 seconds between polls

    if poll_count >= max_polls:
        print("⏰ 3-minute timeout reached - research is still running")
        print("💡 This is normal for complex research queries!")
        print(f"📊 Check results later at: {API_URL}/results/{research_id}")
        print("📋 Monitor progress with: tail -f /tmp/ldr_server.log")
        print(
            "🔍 The script will still try to fetch results (may be incomplete)"
        )

    # Get results
    results_response = session.get(f"{API_URL}/api/report/{research_id}")

    if results_response.status_code == 200:
        results = results_response.json()
        print(f"\n📝 Summary: {results['summary'][:300]}...")
        print(f"📚 Sources: {len(results.get('sources', []))} found")
        print(f"🔍 Findings: {len(results.get('findings', []))} findings")

    # Example 2: Check Settings
    print("\n=== Example 2: Current Settings ===")
    settings_response = session.get(f"{API_URL}/settings/api")

    if settings_response.status_code == 200:
        settings = settings_response.json()["settings"]

        # Show some key settings
        llm_provider = settings.get("llm.provider", {}).get("value", "Not set")
        llm_model = settings.get("llm.model", {}).get("value", "Not set")

        print(f"LLM Provider: {llm_provider}")
        print(f"LLM Model: {llm_model}")

    # Example 3: Get Research History
    print("\n=== Example 3: Research History ===")
    history_response = session.get(f"{API_URL}/history/api")

    if history_response.status_code == 200:
        history = history_response.json()
        items = history.get("items", history.get("history", []))

        print(f"Found {len(items)} research items")
        for item in items[:3]:  # Show first 3
            print(
                f"- {item.get('query', 'Unknown query')} ({item.get('created_at', 'Unknown date')})"
            )

    # Example 4: Get and Display Research Results (with retry logic)
    print("\n=== Example 4: Research Results ===")
    if research_id:
        print(f"📄 Fetching research results for ID: {research_id}")
        print(
            "🔄 Will retry until results are available (up to 2 additional minutes)\n"
        )

        # Retry fetching results until available
        results_retries = 0
        max_results_retries = 12  # 2 minutes (12 * 10 seconds)

        while results_retries < max_results_retries:
            results_response = session.get(
                f"{API_URL}/api/report/{research_id}"
            )

            if results_response.status_code == 200:
                # Results are available, parse and display them
                results = results_response.json()

                content = results.get("content", "")
                sources = results.get("sources", [])
                findings = results.get("findings", [])

                print(
                    f"✅ Results retrieved successfully after {(results_retries + 1) * 10} seconds!"
                )
                print("\n📝 RESEARCH SUMMARY:")
                print("=" * 50)
                if content:
                    # Show first 500 characters of the summary
                    summary_preview = (
                        content[:500] + "..." if len(content) > 500 else content
                    )
                    print(summary_preview)
                else:
                    print("No summary content available")

                print(f"\n📚 SOURCES FOUND: {len(sources)}")
                for i, source in enumerate(
                    sources[:3], 1
                ):  # Show first 3 sources
                    title = source.get("title", "Unknown Title")
                    url = source.get("url", "No URL")
                    print(f"   {i}. {title}")
                    print(f"      {url}")

                if len(sources) > 3:
                    print(f"   ... and {len(sources) - 3} more sources")

                print(f"\n🔍 KEY FINDINGS: {len(findings)}")
                for i, finding in enumerate(
                    findings[:3], 1
                ):  # Show first 3 findings
                    finding_text = finding.get("text", "No finding text")
                    finding_preview = (
                        finding_text[:150] + "..."
                        if len(finding_text) > 150
                        else finding_text
                    )
                    print(f"   {i}. {finding_preview}")

                if len(findings) > 3:
                    print(f"   ... and {len(findings) - 3} more findings")

                print(
                    f"\n🌐 View full results at: {API_URL}/results/{research_id}"
                )
                print("=" * 50)
                print("🎉 Results displayed successfully!")
                break  # Exit retry loop - success!

            elif results_response.status_code == 404:
                results_retries += 1
                elapsed_time = results_retries * 10
                print(
                    f"  Retry {results_retries}/{max_results_retries} ({elapsed_time}s): Results not ready yet, waiting..."
                )
                time.sleep(10)  # Wait 10 seconds before retrying

            else:
                print(
                    f"❌ Failed to fetch results: {results_response.status_code}"
                )
                print(f"Response: {results_response.text[:200]}")
                break  # Exit retry loop - error

        # Handle case where max retries reached
        if results_retries >= max_results_retries:
            print(
                f"\n⏰ Maximum retry time reached ({max_results_retries * 10} seconds)"
            )
            print("💡 This is normal for complex research queries!")
            print(f"📊 Check results later at: {API_URL}/results/{research_id}")
            print("📋 Monitor progress with: tail -f /tmp/ldr_server.log")
            print(
                "🔍 The research is still running - results will be available when complete"
            )
    else:
        print(
            "⚠️  No research ID available - research may not have started properly"
        )

    # Logout
    print("\n5. Logging out...")
    session.post(f"{API_URL}/auth/logout", headers=headers)
    print("✅ Logged out successfully")


if __name__ == "__main__":
    print("🎯 Simple LDR HTTP API Example - Works out of the box!")
    print("⚡ This script creates a user automatically and tests the API")
    print(
        "⏱️  Total runtime: Up to 3 minutes polling + 2 minutes results retry + research time"
    )
    print(
        "🔄 Automatically retries fetching results until available (up to 2 minutes)\n"
    )

    print("📋 REQUIREMENTS:")
    print("  • LDR server running")
    print("  • Beautiful Soup: pip install beautifulsoup4\n")

    print("🚀 START THE SERVER:")
    print("  • Option 1: python -m local_deep_research.web.app")
    print("  • Option 2: bash scripts/dev/restart_server.sh (recommended)")
    print("  • Note: restart_server.sh will kill existing server process\n")

    print("📊 MONITORING:")
    print("  • Server logs: tail -f /tmp/ldr_server.log")
    print("  • This script polls for up to 3 minutes")
    print("  • If research takes longer, script shows where to check results\n")

    print("⏰ TIMING INFO:")
    print("  • Script polls for 3 minutes to let research complete")
    print("  • Then retries fetching results for up to 2 additional minutes")
    print("  • Research typically completes in 2-10 minutes")
    print("  • Script displays results automatically when available")
    print(
        "  • If timeout reached, results URL provided for checking completion\n"
    )

    main()