Files
local-deep-research/examples/optimization/llm_multi_benchmark.py
LearningCircuit 0c6635ecc2 feat: Add pre-commit hook to enforce pathlib usage (issue #640) (#656)
* feat: Add pre-commit hook to enforce pathlib usage (issue #640)

- Created check-pathlib-usage.py pre-commit hook using AST parsing
- Detects os.path usage and suggests pathlib alternatives
- Fixed os.path.normpath usage in auth/routes.py to use PurePosixPath
- Added hook configuration to .pre-commit-config.yaml

The hook provides helpful suggestions for replacing os.path calls with
their pathlib equivalents for better cross-platform compatibility.

Co-Authored-By: djpetti <djpetti@users.noreply.github.com>

* feat: Add missing pathlib pre-commit hook script

Co-Authored-By: djpetti <djpetti@users.noreply.github.com>

* refactor: Migrate core src modules from os.path to pathlib

- Fixed web/app_factory.py, config/llm_config.py, metrics/token_counter.py
- Fixed utilities/es_utils.py, web/routes/benchmark_routes.py
- Fixed web/routes/settings_routes.py, web_search_engines/engines/search_engine_local.py
- Replaced os.path.join() with Path() / syntax
- Replaced os.path.exists() with Path().exists()
- Replaced os.path.basename() with Path().name
- Replaced os.path.dirname() with Path().parent

Part of the migration to modern pathlib API for better cross-platform
compatibility and cleaner code.

Co-Authored-By: djpetti <djpetti@users.noreply.github.com>

* refactor: Migrate from os.path to pathlib in src and tests (issue #640)

Replaced os.path usage with pathlib.Path throughout:
- src/local_deep_research/benchmarks: All os.path.join, exists, dirname, basename, abspath replaced
- tests directory: Complete migration of all test files
- Improved cross-platform compatibility and code readability
- Kept os.path.expandvars in env_settings.py (no pathlib equivalent)

Part of pre-commit hook enforcement for pathlib usage.
Remaining work: examples/ and scripts/ directories.

Co-Authored-By: djpetti

* fix: Complete migration from os.path to pathlib.Path (issue #640)

Completed manual migration of all os.path usage to pathlib.Path across:
- scripts/ directory (3 files)
- examples/ directory (25 files total)
  - examples/benchmarks/ (8 files)
  - examples/optimization/ (16 files)
  - examples/show_env_vars.py
- src/local_deep_research/settings/env_settings.py

Changes made:
- Replaced os.path.join() with Path() / syntax
- Replaced os.path.exists() with Path().exists()
- Replaced os.path.dirname() with Path().parent
- Replaced os.path.basename() with Path().name or Path().stem
- Replaced os.path.abspath() with Path().resolve()
- Replaced os.makedirs() with Path().mkdir(parents=True, exist_ok=True)
- Added pathlib import where needed

Note: Kept os.path.expandvars in env_settings.py as there is no pathlib
equivalent. Added comment explaining this limitation.

This completes the pathlib migration for issue #640.

Co-Authored-By: djpetti

* fix: Allow os.path.expandvars in pathlib pre-commit hook

Updated the check-pathlib-usage.py pre-commit hook to skip checking
os.path.expandvars since it has no pathlib equivalent.

Changes:
- Added exception for expandvars in both visit_Attribute and visit_Call methods
- Added comment in equivalents dictionary noting expandvars is allowed
- This allows env_settings.py to use os.path.expandvars without failing checks

This resolves the pre-commit CI failure while maintaining the pathlib
enforcement for all other os.path methods.

Co-Authored-By: djpetti

---------

Co-authored-by: djpetti
2025-08-17 22:52:35 +02:00

252 lines
7.5 KiB
Python

#!/usr/bin/env python
"""
Custom LLM multi-benchmark optimization example for Local Deep Research.
This script demonstrates how to run multi-benchmark optimization with custom LLM models.
Usage:
# Run from project root with PDM
cd /path/to/local-deep-research
pdm run python examples/optimization/llm_multi_benchmark.py --model "your-model" --provider "your-provider"
"""
import argparse
import os
import sys
from datetime import datetime, UTC
from pathlib import Path
from typing import Any, Dict, Optional
from loguru import logger
# Import benchmark optimization functions
from local_deep_research.benchmarks.optimization.api import optimize_parameters
def setup_llm_config(
model: Optional[str] = None,
provider: Optional[str] = None,
endpoint_url: Optional[str] = None,
api_key: Optional[str] = None,
temperature: float = 0.7,
) -> Dict[str, Any]:
"""
Set up LLM configuration for benchmarks and optimization.
Args:
model: LLM model name
provider: LLM provider
endpoint_url: Custom endpoint URL for OpenRouter or other services
api_key: API key for the service
temperature: LLM temperature
Returns:
Dictionary with LLM configuration
"""
config = {
"model_name": model,
"provider": provider,
"temperature": temperature,
}
if endpoint_url:
config["openai_endpoint_url"] = endpoint_url
os.environ["OPENAI_ENDPOINT_URL"] = endpoint_url
os.environ["LDR_LLM__OPENAI_ENDPOINT_URL"] = endpoint_url
if api_key:
# Set API key in environment
if provider == "openai" or provider == "openai_endpoint":
os.environ["OPENAI_API_KEY"] = api_key
os.environ["LDR_LLM__OPENAI_API_KEY"] = api_key
if provider == "openai_endpoint":
os.environ["OPENAI_ENDPOINT_API_KEY"] = api_key
os.environ["LDR_LLM__OPENAI_ENDPOINT_API_KEY"] = api_key
elif provider == "anthropic":
os.environ["ANTHROPIC_API_KEY"] = api_key
os.environ["LDR_LLM__ANTHROPIC_API_KEY"] = api_key
config["api_key"] = api_key
# Set model and provider in environment
if model:
os.environ["LDR_LLM__MODEL"] = model
if provider:
os.environ["LDR_LLM__PROVIDER"] = provider
return config
def main():
"""Run multi-benchmark optimization with custom LLM."""
parser = argparse.ArgumentParser(
description="Run multi-benchmark optimization with custom LLM"
)
# LLM configuration
parser.add_argument("--model", help="LLM model name")
parser.add_argument(
"--provider", help="LLM provider (openai, anthropic, openai_endpoint)"
)
parser.add_argument(
"--endpoint-url", help="Custom endpoint URL (for OpenRouter etc.)"
)
parser.add_argument("--api-key", help="API key for the LLM provider")
parser.add_argument(
"--temperature", type=float, default=0.7, help="Temperature for LLM"
)
# Optimization parameters
parser.add_argument(
"--mode",
choices=["balanced", "speed", "quality"],
default="balanced",
help="Optimization mode",
)
parser.add_argument(
"--trials", type=int, default=3, help="Number of trials (default: 3)"
)
parser.add_argument(
"--output-dir", help="Output directory (default: auto-generated)"
)
args = parser.parse_args()
# Create timestamp-based directory for results
timestamp = datetime.now(UTC).strftime("%Y%m%d_%H%M%S")
if args.output_dir:
output_dir = args.output_dir
else:
output_dir = str(
Path("examples")
/ "optimization"
/ "results"
/ f"llm_multi_benchmark_{timestamp}"
)
os.makedirs(output_dir, exist_ok=True)
print(f"Results will be saved to: {output_dir}")
# Set up LLM configuration
setup_llm_config(
model=args.model,
provider=args.provider,
endpoint_url=args.endpoint_url,
api_key=args.api_key,
temperature=args.temperature,
)
if args.model and args.provider:
print(f"Using LLM: {args.model} via {args.provider}")
else:
print("Using default LLM configuration from environment or database")
# Define a small parameter space for quick demonstration
param_space = {
"iterations": {
"type": "int",
"low": 1,
"high": 2,
"step": 1,
},
"questions_per_iteration": {
"type": "int",
"low": 1,
"high": 2,
"step": 1,
},
"search_strategy": {
"type": "categorical",
"choices": ["rapid", "source_based"], # Limited choices for speed
},
}
# Example query for running optimization
query = "Recent developments in fusion energy research"
# Define metrics weights based on mode
if args.mode == "speed":
metric_weights = {"speed": 0.8, "quality": 0.2}
elif args.mode == "quality":
metric_weights = {"quality": 0.9, "speed": 0.1}
else: # balanced
metric_weights = {"quality": 0.5, "speed": 0.5}
# Run optimization with multi-benchmark weights
print(
f"\n🔍 Running {args.mode}-focused optimization with SimpleQA and BrowseComp..."
)
try:
# Run optimization with combined benchmark weights
benchmark_weights = {
"simpleqa": 0.7,
"browsecomp": 0.3,
} # 70% SimpleQA, 30% BrowseComp
params, score = optimize_parameters(
query=query,
param_space=param_space,
output_dir=output_dir,
n_trials=args.trials,
model_name=args.model,
provider=args.provider,
openai_endpoint_url=args.endpoint_url,
temperature=args.temperature,
api_key=args.api_key,
benchmark_weights=benchmark_weights,
metric_weights=metric_weights,
search_tool="searxng",
)
print("\n" + "=" * 50)
print(f" OPTIMIZATION RESULTS - {args.mode.upper()} MODE ")
print("=" * 50)
print(f"SCORE: {score:.4f}")
print("Benchmark weights: SimpleQA 70%, BrowseComp 30%")
print(f"Metrics weights: {metric_weights}")
if args.model and args.provider:
print(f"LLM: {args.model} via {args.provider}")
print("\nBest Parameters:")
for param, value in params.items():
print(f" {param}: {value}")
print("=" * 50 + "\n")
# Save results to file
import json
with open(Path(output_dir) / "multi_benchmark_results.json", "w") as f:
json.dump(
{
"timestamp": timestamp,
"mode": args.mode,
"model": args.model,
"provider": args.provider,
"n_trials": args.trials,
"benchmark_weights": benchmark_weights,
"metric_weights": metric_weights,
"best_parameters": params,
"best_score": float(score),
},
f,
indent=2,
)
print(
f"Results saved to {Path(output_dir) / 'multi_benchmark_results.json'}"
)
except Exception:
logger.exception("Error running optimization")
import traceback
traceback.print_exc()
return 1
return 0
if __name__ == "__main__":
sys.exit(main())