Files
local-deep-research/examples/api_usage/http/advanced/simple_http_example.py
LearningCircuit 3be8341f66 fix: enable localhost HTTP for development without TESTING flag
Implement dynamic cookie security that allows localhost HTTP connections
to work out of the box while maintaining security for production:

- Add WSGI middleware (SecureCookieMiddleware) for dynamic Secure flag
- Localhost HTTP (127.0.0.1, ::1): No Secure flag (local traffic is safe)
- Proxied requests (X-Forwarded-For): Always add Secure flag (production)
- Non-localhost HTTP: Add Secure flag (requires HTTPS by design)
- TESTING mode: Never add Secure flag (for CI/development)

Security: Prevents X-Forwarded-For spoofing by checking for header
presence rather than value - any proxy header triggers Secure flag.

Also includes:
- Update HTTP examples with clear "LOCALHOST ONLY" documentation
- Add helpful CSRF error message explaining the security model
- Add comprehensive cookie security tests (9 tests)
- Add cookie security tests to CI workflow
2025-12-07 13:59:32 +01:00

436 lines
16 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Simple HTTP API Example for Local Deep Research v1.0+
This example shows how to use the LDR API with authentication.
Works completely out of the box with automatic user creation.
================================================================================
IMPORTANT - LOCALHOST ONLY
================================================================================
This example ONLY works when connecting via localhost:
✅ http://localhost:5000
✅ http://127.0.0.1:5000
It will NOT work via http://192.168.x.x:5000 or other non-localhost addresses.
WHY: Session cookies require HTTPS for non-localhost (security).
SOLUTIONS for non-localhost:
1. HTTPS with reverse proxy (production)
2. SSH tunnel: ssh -L 5000:localhost:5000 user@server
3. TESTING=1 env var (INSECURE - dev only!)
WARNING: TESTING=1 disables cookie security. Never use in production.
================================================================================
"""
import requests
import time
import sys
from bs4 import BeautifulSoup
from pathlib import Path
# Add the src directory to Python path for programmatic user creation
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "src"))
from local_deep_research.database.encrypted_db import DatabaseManager
from local_deep_research.database.models import User
from local_deep_research.database.auth_db import get_auth_db_session
# Configuration
API_URL = "http://localhost:5000"
def create_test_user():
"""Create a test user programmatically."""
username = f"testuser_{int(time.time())}"
password = "testpassword123"
print(f"Creating test user: {username}")
try:
# Create user in auth database
auth_db = get_auth_db_session()
new_user = User(username=username)
auth_db.add(new_user)
auth_db.commit()
auth_db.close()
# Create encrypted database for user
db_manager = DatabaseManager()
db_manager.create_user_database(username, password)
print(f"✅ User created successfully: {username}")
return username, password
except Exception as e:
print(f"❌ Failed to create user: {e}")
return None, None
def main():
print("=== LDR HTTP API Example ===")
print("🎯 This example works completely out of the box!\n")
print("⚠️ IMPORTANT NOTES:")
print(" • This script may take several minutes to complete")
print(" • Research progress can be monitored in the server logs")
print(" • Server logs are available at: /tmp/ldr_server.log")
print(
" • Use 'tail -f /tmp/ldr_server.log' to monitor progress in real-time"
)
print(" • Results will be available at the URL shown when complete\n")
# Check if server is running
try:
response = requests.get(f"{API_URL}/", timeout=5)
if response.status_code != 200:
print("❌ Server is not responding correctly")
print("\n📋 HOW TO START THE SERVER:")
print(" • Option 1: python -m local_deep_research.web.app")
print(
" • Option 2: bash scripts/dev/restart_server.sh (recommended)"
)
print(
" • Note: restart_server.sh will kill existing server process"
)
sys.exit(1)
print("✅ Server is running")
except Exception:
print(
"❌ Cannot connect to server. Please make sure it's running on http://localhost:5000"
)
print("\n📋 HOW TO START THE SERVER:")
print(" • Option 1: python -m local_deep_research.web.app")
print(" • Option 2: bash scripts/dev/restart_server.sh (recommended)")
print(" • Note: restart_server.sh will kill existing server process")
sys.exit(1)
# Create test user automatically
username, password = create_test_user()
if not username:
print("❌ Failed to create test user")
sys.exit(1)
# Create a session to persist cookies
session = requests.Session()
print(f"\nTesting with user: {username}")
# Step 1: Login
print("\n1. Authenticating...")
# Get login page and CSRF token
login_page = session.get(f"{API_URL}/auth/login")
soup = BeautifulSoup(login_page.text, "html.parser")
csrf_input = soup.find("input", {"name": "csrf_token"})
login_csrf = csrf_input.get("value")
if not login_csrf:
print("❌ Could not get CSRF token from login page")
sys.exit(1)
# Login with form data (not JSON)
login_response = session.post(
f"{API_URL}/auth/login",
data={
"username": username,
"password": password,
"csrf_token": login_csrf,
},
allow_redirects=False,
)
if login_response.status_code not in [200, 302]:
print(f"❌ Login failed: {login_response.text}")
print("\nPlease ensure:")
print("- The server is running: python -m local_deep_research.web.app")
sys.exit(1)
print("✅ Login successful")
# Step 2: Get CSRF token
print("\n2. Getting CSRF token...")
csrf_response = session.get(f"{API_URL}/auth/csrf-token")
csrf_token = csrf_response.json()["csrf_token"]
headers = {"X-CSRF-Token": csrf_token}
print("✅ CSRF token obtained")
# Initialize research_id to None
research_id = None
# Example 1: Quick Summary (using the start endpoint)
print("\n=== Example 1: Quick Summary ===")
print(
"📝 This example demonstrates starting a research query and polling for results"
)
print("⏱️ This typically takes 1-3 minutes to complete\n")
research_request = {
"query": "What is machine learning?",
"model": None, # Will use default from settings
"search_engines": ["wikipedia"], # Fast for demo
"iterations": 1,
"questions_per_iteration": 2,
}
# Start research - CORRECT ENDPOINT
print("🚀 Starting research...")
start_response = session.post(
f"{API_URL}/api/start_research", json=research_request, headers=headers
)
if start_response.status_code != 200:
print(f"❌ Failed to start research: {start_response.text}")
sys.exit(1)
research_data = start_response.json()
research_id = research_data["research_id"]
print("✅ Research started successfully!")
print(f"🆔 Research ID: {research_id}")
print("📊 Monitor progress in server logs: tail -f /tmp/ldr_server.log")
print(f"🌐 Results will be available at: {API_URL}/results/{research_id}\n")
# Poll for results
print("⏳ Waiting for research to complete...")
print(
"⚠️ NOTE: This will poll for up to 3 minutes to ensure research completes"
)
print(
" If it fails, the research may still be running - check the results URL\n"
)
poll_count = 0
max_polls = 18 # Maximum 3 minutes (18 * 10 seconds)
while poll_count < max_polls:
status_response = session.get(
f"{API_URL}/api/research/{research_id}/status"
)
if status_response.status_code == 200:
status = status_response.json()
current_status = status.get("status", "unknown")
progress = status.get("progress", 0)
poll_count += 1
elapsed_time = poll_count * 10 # 10 seconds per poll
print(
f" Check {poll_count} ({elapsed_time}s): Status = {current_status} (Progress: {progress}%)"
)
if current_status == "completed":
print("🎉 Research completed successfully!")
break
elif current_status == "failed":
print(
f"❌ Research failed: {status.get('error', 'Unknown error')}"
)
print(
"📋 Check server logs for details: tail -f /tmp/ldr_server.log"
)
sys.exit(1)
elif current_status in ["queued", "in_progress"]:
# Continue polling
pass
else:
print(f"⚠️ Unexpected status: {current_status}")
else:
print(
f"⚠️ Status check failed with code: {status_response.status_code}"
)
time.sleep(10) # Wait 10 seconds between polls
if poll_count >= max_polls:
print("⏰ 3-minute timeout reached - research is still running")
print("💡 This is normal for complex research queries!")
print(f"📊 Check results later at: {API_URL}/results/{research_id}")
print("📋 Monitor progress with: tail -f /tmp/ldr_server.log")
print(
"🔍 The script will still try to fetch results (may be incomplete)"
)
# Get results
results_response = session.get(f"{API_URL}/api/report/{research_id}")
if results_response.status_code == 200:
results = results_response.json()
print(f"\n📝 Summary: {results['summary'][:300]}...")
print(f"📚 Sources: {len(results.get('sources', []))} found")
print(f"🔍 Findings: {len(results.get('findings', []))} findings")
# Example 2: Check Settings
print("\n=== Example 2: Current Settings ===")
settings_response = session.get(f"{API_URL}/settings/api")
if settings_response.status_code == 200:
settings = settings_response.json()["settings"]
# Show some key settings
llm_provider = settings.get("llm.provider", {}).get("value", "Not set")
llm_model = settings.get("llm.model", {}).get("value", "Not set")
print(f"LLM Provider: {llm_provider}")
print(f"LLM Model: {llm_model}")
# Example 3: Get Research History
print("\n=== Example 3: Research History ===")
history_response = session.get(f"{API_URL}/history/api")
if history_response.status_code == 200:
history = history_response.json()
items = history.get("items", history.get("history", []))
print(f"Found {len(items)} research items")
for item in items[:3]: # Show first 3
print(
f"- {item.get('query', 'Unknown query')} ({item.get('created_at', 'Unknown date')})"
)
# Example 4: Get and Display Research Results (with retry logic)
print("\n=== Example 4: Research Results ===")
if research_id:
print(f"📄 Fetching research results for ID: {research_id}")
print(
"🔄 Will retry until results are available (up to 2 additional minutes)\n"
)
# Retry fetching results until available
results_retries = 0
max_results_retries = 12 # 2 minutes (12 * 10 seconds)
while results_retries < max_results_retries:
results_response = session.get(
f"{API_URL}/api/report/{research_id}"
)
if results_response.status_code == 200:
# Results are available, parse and display them
results = results_response.json()
content = results.get("content", "")
sources = results.get("sources", [])
findings = results.get("findings", [])
print(
f"✅ Results retrieved successfully after {(results_retries + 1) * 10} seconds!"
)
print("\n📝 RESEARCH SUMMARY:")
print("=" * 50)
if content:
# Show first 500 characters of the summary
summary_preview = (
content[:500] + "..." if len(content) > 500 else content
)
print(summary_preview)
else:
print("No summary content available")
print(f"\n📚 SOURCES FOUND: {len(sources)}")
for i, source in enumerate(
sources[:3], 1
): # Show first 3 sources
title = source.get("title", "Unknown Title")
url = source.get("url", "No URL")
print(f" {i}. {title}")
print(f" {url}")
if len(sources) > 3:
print(f" ... and {len(sources) - 3} more sources")
print(f"\n🔍 KEY FINDINGS: {len(findings)}")
for i, finding in enumerate(
findings[:3], 1
): # Show first 3 findings
finding_text = finding.get("text", "No finding text")
finding_preview = (
finding_text[:150] + "..."
if len(finding_text) > 150
else finding_text
)
print(f" {i}. {finding_preview}")
if len(findings) > 3:
print(f" ... and {len(findings) - 3} more findings")
print(
f"\n🌐 View full results at: {API_URL}/results/{research_id}"
)
print("=" * 50)
print("🎉 Results displayed successfully!")
break # Exit retry loop - success!
elif results_response.status_code == 404:
results_retries += 1
elapsed_time = results_retries * 10
print(
f" Retry {results_retries}/{max_results_retries} ({elapsed_time}s): Results not ready yet, waiting..."
)
time.sleep(10) # Wait 10 seconds before retrying
else:
print(
f"❌ Failed to fetch results: {results_response.status_code}"
)
print(f"Response: {results_response.text[:200]}")
break # Exit retry loop - error
# Handle case where max retries reached
if results_retries >= max_results_retries:
print(
f"\n⏰ Maximum retry time reached ({max_results_retries * 10} seconds)"
)
print("💡 This is normal for complex research queries!")
print(f"📊 Check results later at: {API_URL}/results/{research_id}")
print("📋 Monitor progress with: tail -f /tmp/ldr_server.log")
print(
"🔍 The research is still running - results will be available when complete"
)
else:
print(
"⚠️ No research ID available - research may not have started properly"
)
# Logout
print("\n5. Logging out...")
session.post(f"{API_URL}/auth/logout", headers=headers)
print("✅ Logged out successfully")
if __name__ == "__main__":
print("🎯 Simple LDR HTTP API Example - Works out of the box!")
print("⚡ This script creates a user automatically and tests the API")
print(
"⏱️ Total runtime: Up to 3 minutes polling + 2 minutes results retry + research time"
)
print(
"🔄 Automatically retries fetching results until available (up to 2 minutes)\n"
)
print("📋 REQUIREMENTS:")
print(" • LDR server running")
print(" • Beautiful Soup: pip install beautifulsoup4\n")
print("🚀 START THE SERVER:")
print(" • Option 1: python -m local_deep_research.web.app")
print(" • Option 2: bash scripts/dev/restart_server.sh (recommended)")
print(" • Note: restart_server.sh will kill existing server process\n")
print("📊 MONITORING:")
print(" • Server logs: tail -f /tmp/ldr_server.log")
print(" • This script polls for up to 3 minutes")
print(" • If research takes longer, script shows where to check results\n")
print("⏰ TIMING INFO:")
print(" • Script polls for 3 minutes to let research complete")
print(" • Then retries fetching results for up to 2 additional minutes")
print(" • Research typically completes in 2-10 minutes")
print(" • Script displays results automatically when available")
print(
" • If timeout reached, results URL provided for checking completion\n"
)
main()