Files
local-deep-research/examples/optimization/gemini_optimization.py
LearningCircuit 0c6635ecc2 feat: Add pre-commit hook to enforce pathlib usage (issue #640) (#656)
* feat: Add pre-commit hook to enforce pathlib usage (issue #640)

- Created check-pathlib-usage.py pre-commit hook using AST parsing
- Detects os.path usage and suggests pathlib alternatives
- Fixed os.path.normpath usage in auth/routes.py to use PurePosixPath
- Added hook configuration to .pre-commit-config.yaml

The hook provides helpful suggestions for replacing os.path calls with
their pathlib equivalents for better cross-platform compatibility.

Co-Authored-By: djpetti <djpetti@users.noreply.github.com>

* feat: Add missing pathlib pre-commit hook script

Co-Authored-By: djpetti <djpetti@users.noreply.github.com>

* refactor: Migrate core src modules from os.path to pathlib

- Fixed web/app_factory.py, config/llm_config.py, metrics/token_counter.py
- Fixed utilities/es_utils.py, web/routes/benchmark_routes.py
- Fixed web/routes/settings_routes.py, web_search_engines/engines/search_engine_local.py
- Replaced os.path.join() with Path() / syntax
- Replaced os.path.exists() with Path().exists()
- Replaced os.path.basename() with Path().name
- Replaced os.path.dirname() with Path().parent

Part of the migration to modern pathlib API for better cross-platform
compatibility and cleaner code.

Co-Authored-By: djpetti <djpetti@users.noreply.github.com>

* refactor: Migrate from os.path to pathlib in src and tests (issue #640)

Replaced os.path usage with pathlib.Path throughout:
- src/local_deep_research/benchmarks: All os.path.join, exists, dirname, basename, abspath replaced
- tests directory: Complete migration of all test files
- Improved cross-platform compatibility and code readability
- Kept os.path.expandvars in env_settings.py (no pathlib equivalent)

Part of pre-commit hook enforcement for pathlib usage.
Remaining work: examples/ and scripts/ directories.

Co-Authored-By: djpetti

* fix: Complete migration from os.path to pathlib.Path (issue #640)

Completed manual migration of all os.path usage to pathlib.Path across:
- scripts/ directory (3 files)
- examples/ directory (25 files total)
  - examples/benchmarks/ (8 files)
  - examples/optimization/ (16 files)
  - examples/show_env_vars.py
- src/local_deep_research/settings/env_settings.py

Changes made:
- Replaced os.path.join() with Path() / syntax
- Replaced os.path.exists() with Path().exists()
- Replaced os.path.dirname() with Path().parent
- Replaced os.path.basename() with Path().name or Path().stem
- Replaced os.path.abspath() with Path().resolve()
- Replaced os.makedirs() with Path().mkdir(parents=True, exist_ok=True)
- Added pathlib import where needed

Note: Kept os.path.expandvars in env_settings.py as there is no pathlib
equivalent. Added comment explaining this limitation.

This completes the pathlib migration for issue #640.

Co-Authored-By: djpetti

* fix: Allow os.path.expandvars in pathlib pre-commit hook

Updated the check-pathlib-usage.py pre-commit hook to skip checking
os.path.expandvars since it has no pathlib equivalent.

Changes:
- Added exception for expandvars in both visit_Attribute and visit_Call methods
- Added comment in equivalents dictionary noting expandvars is allowed
- This allows env_settings.py to use os.path.expandvars without failing checks

This resolves the pre-commit CI failure while maintaining the pathlib
enforcement for all other os.path methods.

Co-Authored-By: djpetti

---------

Co-authored-by: djpetti
2025-08-17 22:52:35 +02:00

214 lines
6.5 KiB
Python

#!/usr/bin/env python
"""
Optimization Example with Gemini 2.0 Flash via OpenRouter.
This script demonstrates how to run parameter optimization using the Gemini 2.0 Flash
model via OpenRouter.
Usage:
# Install dependencies with PDM
cd /path/to/local-deep-research
pdm install
# Set your OpenRouter API key
export OPENAI_ENDPOINT_API_KEY="your_openrouter_api_key"
# Run the script with PDM
pdm run python examples/optimization/gemini_optimization.py
"""
import argparse
import json
import os
import sys
from datetime import datetime, timezone
from pathlib import Path
from loguru import logger
# Import the optimization functionality
from local_deep_research.benchmarks.optimization import (
optimize_for_quality,
optimize_for_speed,
optimize_parameters,
)
def setup_gemini_config(api_key=None):
"""
Create a configuration for using Gemini via OpenRouter.
Args:
api_key: OpenRouter API key. If None, will try to get from environment.
Returns:
Dictionary with Gemini configuration.
"""
# Get API key from argument or environment
if not api_key:
api_key = os.environ.get("OPENAI_ENDPOINT_API_KEY")
if not api_key:
api_key = os.environ.get("LDR_LLM__OPENAI_ENDPOINT_API_KEY")
if not api_key:
logger.error("No API key found. Please provide an OpenRouter API key.")
return None
return {
"model_name": "google/gemini-2.0-flash-001", # OpenRouter format for Gemini
"provider": "openai_endpoint", # Use OpenRouter as endpoint
"openai_endpoint_url": "https://openrouter.ai/api/v1",
"api_key": api_key,
}
def main():
# Parse arguments
parser = argparse.ArgumentParser(
description="Run optimization with Gemini 2.0 Flash via OpenRouter"
)
parser.add_argument(
"--api-key",
help="OpenRouter API key. If not provided, will try to use from environment.",
)
parser.add_argument(
"--mode",
choices=["balanced", "speed", "quality"],
default="balanced",
help="Optimization mode (default: balanced)",
)
parser.add_argument(
"--trials",
type=int,
default=3,
help="Number of optimization trials (default: 3)",
)
parser.add_argument(
"--output-dir",
default=None,
help="Directory to save results (default: auto-generated)",
)
args = parser.parse_args()
# Set up Gemini configuration
gemini_config = setup_gemini_config(args.api_key)
if not gemini_config:
return 1
# Create timestamp for unique output directory
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
if args.output_dir:
output_dir = args.output_dir
else:
output_dir = str(
Path("examples")
/ "optimization"
/ "results"
/ f"gemini_opt_{timestamp}"
)
Path(output_dir).mkdir(parents=True, exist_ok=True)
print(
f"Starting optimization with Gemini 2.0 Flash - results will be saved to {output_dir}"
)
print(
f"Using model: {gemini_config['model_name']} via {gemini_config['provider']}"
)
# Set environment variables to ensure proper API access
os.environ["OPENAI_ENDPOINT_API_KEY"] = gemini_config["api_key"]
os.environ["LDR_LLM__OPENAI_ENDPOINT_API_KEY"] = gemini_config["api_key"]
os.environ["OPENAI_ENDPOINT_URL"] = gemini_config["openai_endpoint_url"]
os.environ["LDR_LLM__OPENAI_ENDPOINT_URL"] = gemini_config[
"openai_endpoint_url"
]
os.environ["LDR_LLM__PROVIDER"] = gemini_config["provider"]
os.environ["LDR_LLM__MODEL"] = gemini_config["model_name"]
# Create a very simple parameter space for quick demonstration
param_space = {
"iterations": {
"type": "int",
"low": 1,
"high": 2,
"step": 1,
},
"questions_per_iteration": {
"type": "int",
"low": 1,
"high": 2,
"step": 1,
},
"search_strategy": {
"type": "categorical",
"choices": ["rapid", "source_based"], # Limited choices for speed
},
}
# Run optimization based on selected mode
query = "Recent developments in fusion energy research"
try:
if args.mode == "speed":
print("\n=== Running speed-focused optimization with Gemini ===")
best_params, best_score = optimize_for_speed(
query=query,
param_space=param_space,
n_trials=args.trials,
model_name=gemini_config["model_name"],
provider=gemini_config["provider"],
output_dir=output_dir,
)
elif args.mode == "quality":
print("\n=== Running quality-focused optimization with Gemini ===")
best_params, best_score = optimize_for_quality(
query=query,
param_space=param_space,
n_trials=args.trials,
model_name=gemini_config["model_name"],
provider=gemini_config["provider"],
output_dir=output_dir,
)
else: # balanced
print("\n=== Running balanced optimization with Gemini ===")
best_params, best_score = optimize_parameters(
query=query,
param_space=param_space,
n_trials=args.trials,
model_name=gemini_config["model_name"],
provider=gemini_config["provider"],
output_dir=output_dir,
metric_weights={"quality": 0.5, "speed": 0.5},
)
print(f"Best parameters: {best_params}")
print(f"Best score: {best_score:.4f}")
# Save summary to JSON
summary = {
"timestamp": timestamp,
"mode": args.mode,
"model": gemini_config["model_name"],
"provider": gemini_config["provider"],
"best_parameters": best_params,
"best_score": float(best_score),
}
with open(
Path(output_dir) / "gemini_optimization_summary.json", "w"
) as f:
json.dump(summary, f, indent=2)
print(f"\nOptimization complete! Results saved to {output_dir}")
print(f"Recommended parameters for {args.mode} mode: {best_params}")
except Exception:
logger.exception("Error during optimization")
return 1
return 0
if __name__ == "__main__":
sys.exit(main())