mirror of
https://github.com/LearningCircuit/local-deep-research.git
synced 2026-06-16 20:10:39 +03:00
Cleanup follow-up to #3797. The check-open-encoding hook was originally scoped with exclude: ^(tests/|examples/|scripts/) because those directories had ~45 pre-existing bare open() calls and addressing them was out of scope for the core Windows bug fix. This commit: * adds encoding="utf-8" to 45 read/write call sites under examples/ and scripts/ — JSON benchmark results, config-doc generators, workflow status pages, and the datetime-timezone pre-commit hook * narrows the hook exclude to ^tests/ only, so future regressions in examples/scripts/ are blocked at commit time Windows users running the benchmark scripts and config-doc generator would previously hit silent failures or UnicodeDecodeErrors on non-ASCII content under cp1252. The package itself was already protected by #3797.
216 lines
6.5 KiB
Python
216 lines
6.5 KiB
Python
#!/usr/bin/env python
|
|
"""
|
|
Optimization Example with Gemini 2.0 Flash via OpenRouter.
|
|
|
|
This script demonstrates how to run parameter optimization using the Gemini 2.0 Flash
|
|
model via OpenRouter.
|
|
|
|
Usage:
|
|
# Install dependencies with PDM
|
|
cd /path/to/local-deep-research
|
|
pdm install
|
|
|
|
# Set your OpenRouter API key
|
|
export OPENAI_ENDPOINT_API_KEY="your_openrouter_api_key"
|
|
|
|
# Run the script with PDM
|
|
pdm run python examples/optimization/gemini_optimization.py
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sys
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
from loguru import logger
|
|
|
|
# Import the optimization functionality
|
|
from local_deep_research.benchmarks.optimization import (
|
|
optimize_for_quality,
|
|
optimize_for_speed,
|
|
optimize_parameters,
|
|
)
|
|
|
|
|
|
def setup_gemini_config(api_key=None):
|
|
"""
|
|
Create a configuration for using Gemini via OpenRouter.
|
|
|
|
Args:
|
|
api_key: OpenRouter API key. If None, will try to get from environment.
|
|
|
|
Returns:
|
|
Dictionary with Gemini configuration.
|
|
"""
|
|
# Get API key from argument or environment
|
|
if not api_key:
|
|
api_key = os.environ.get("OPENAI_ENDPOINT_API_KEY")
|
|
if not api_key:
|
|
api_key = os.environ.get("LDR_LLM__OPENAI_ENDPOINT_API_KEY")
|
|
|
|
if not api_key:
|
|
logger.error("No API key found. Please provide an OpenRouter API key.")
|
|
return None
|
|
|
|
return {
|
|
"model_name": "google/gemini-2.0-flash-001", # OpenRouter format for Gemini
|
|
"provider": "openai_endpoint", # Use OpenRouter as endpoint
|
|
"openai_endpoint_url": "https://openrouter.ai/api/v1",
|
|
"api_key": api_key,
|
|
}
|
|
|
|
|
|
def main():
|
|
# Parse arguments
|
|
parser = argparse.ArgumentParser(
|
|
description="Run optimization with Gemini 2.0 Flash via OpenRouter"
|
|
)
|
|
parser.add_argument(
|
|
"--api-key",
|
|
help="OpenRouter API key. If not provided, will try to use from environment.",
|
|
)
|
|
parser.add_argument(
|
|
"--mode",
|
|
choices=["balanced", "speed", "quality"],
|
|
default="balanced",
|
|
help="Optimization mode (default: balanced)",
|
|
)
|
|
parser.add_argument(
|
|
"--trials",
|
|
type=int,
|
|
default=3,
|
|
help="Number of optimization trials (default: 3)",
|
|
)
|
|
parser.add_argument(
|
|
"--output-dir",
|
|
default=None,
|
|
help="Directory to save results (default: auto-generated)",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
# Set up Gemini configuration
|
|
gemini_config = setup_gemini_config(args.api_key)
|
|
if not gemini_config:
|
|
return 1
|
|
|
|
# Create timestamp for unique output directory
|
|
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
|
if args.output_dir:
|
|
output_dir = args.output_dir
|
|
else:
|
|
output_dir = str(
|
|
Path("examples")
|
|
/ "optimization"
|
|
/ "results"
|
|
/ f"gemini_opt_{timestamp}"
|
|
)
|
|
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
|
|
|
print(
|
|
f"Starting optimization with Gemini 2.0 Flash - results will be saved to {output_dir}"
|
|
)
|
|
print(
|
|
f"Using model: {gemini_config['model_name']} via {gemini_config['provider']}"
|
|
)
|
|
|
|
# Set environment variables to ensure proper API access
|
|
os.environ["OPENAI_ENDPOINT_API_KEY"] = gemini_config["api_key"]
|
|
os.environ["LDR_LLM__OPENAI_ENDPOINT_API_KEY"] = gemini_config["api_key"]
|
|
os.environ["OPENAI_ENDPOINT_URL"] = gemini_config["openai_endpoint_url"]
|
|
os.environ["LDR_LLM__OPENAI_ENDPOINT_URL"] = gemini_config[
|
|
"openai_endpoint_url"
|
|
]
|
|
os.environ["LDR_LLM__PROVIDER"] = gemini_config["provider"]
|
|
os.environ["LDR_LLM__MODEL"] = gemini_config["model_name"]
|
|
|
|
# Create a very simple parameter space for quick demonstration
|
|
param_space = {
|
|
"iterations": {
|
|
"type": "int",
|
|
"low": 1,
|
|
"high": 2,
|
|
"step": 1,
|
|
},
|
|
"questions_per_iteration": {
|
|
"type": "int",
|
|
"low": 1,
|
|
"high": 2,
|
|
"step": 1,
|
|
},
|
|
"search_strategy": {
|
|
"type": "categorical",
|
|
"choices": ["rapid", "source_based"], # Limited choices for speed
|
|
},
|
|
}
|
|
|
|
# Run optimization based on selected mode
|
|
query = "Recent developments in fusion energy research"
|
|
|
|
try:
|
|
if args.mode == "speed":
|
|
print("\n=== Running speed-focused optimization with Gemini ===")
|
|
best_params, best_score = optimize_for_speed(
|
|
query=query,
|
|
param_space=param_space,
|
|
n_trials=args.trials,
|
|
model_name=gemini_config["model_name"],
|
|
provider=gemini_config["provider"],
|
|
output_dir=output_dir,
|
|
)
|
|
elif args.mode == "quality":
|
|
print("\n=== Running quality-focused optimization with Gemini ===")
|
|
best_params, best_score = optimize_for_quality(
|
|
query=query,
|
|
param_space=param_space,
|
|
n_trials=args.trials,
|
|
model_name=gemini_config["model_name"],
|
|
provider=gemini_config["provider"],
|
|
output_dir=output_dir,
|
|
)
|
|
else: # balanced
|
|
print("\n=== Running balanced optimization with Gemini ===")
|
|
best_params, best_score = optimize_parameters(
|
|
query=query,
|
|
param_space=param_space,
|
|
n_trials=args.trials,
|
|
model_name=gemini_config["model_name"],
|
|
provider=gemini_config["provider"],
|
|
output_dir=output_dir,
|
|
metric_weights={"quality": 0.5, "speed": 0.5},
|
|
)
|
|
|
|
print(f"Best parameters: {best_params}")
|
|
print(f"Best score: {best_score:.4f}")
|
|
|
|
# Save summary to JSON
|
|
summary = {
|
|
"timestamp": timestamp,
|
|
"mode": args.mode,
|
|
"model": gemini_config["model_name"],
|
|
"provider": gemini_config["provider"],
|
|
"best_parameters": best_params,
|
|
"best_score": float(best_score),
|
|
}
|
|
|
|
with open(
|
|
Path(output_dir) / "gemini_optimization_summary.json",
|
|
"w",
|
|
encoding="utf-8",
|
|
) as f:
|
|
json.dump(summary, f, indent=2)
|
|
|
|
print(f"\nOptimization complete! Results saved to {output_dir}")
|
|
print(f"Recommended parameters for {args.mode} mode: {best_params}")
|
|
|
|
except Exception:
|
|
logger.exception("Error during optimization")
|
|
return 1
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|