mirror of
https://github.com/LearningCircuit/local-deep-research.git
synced 2026-06-15 19:46:56 +03:00
* feat: Add pre-commit hook to enforce pathlib usage (issue #640) - Created check-pathlib-usage.py pre-commit hook using AST parsing - Detects os.path usage and suggests pathlib alternatives - Fixed os.path.normpath usage in auth/routes.py to use PurePosixPath - Added hook configuration to .pre-commit-config.yaml The hook provides helpful suggestions for replacing os.path calls with their pathlib equivalents for better cross-platform compatibility. Co-Authored-By: djpetti <djpetti@users.noreply.github.com> * feat: Add missing pathlib pre-commit hook script Co-Authored-By: djpetti <djpetti@users.noreply.github.com> * refactor: Migrate core src modules from os.path to pathlib - Fixed web/app_factory.py, config/llm_config.py, metrics/token_counter.py - Fixed utilities/es_utils.py, web/routes/benchmark_routes.py - Fixed web/routes/settings_routes.py, web_search_engines/engines/search_engine_local.py - Replaced os.path.join() with Path() / syntax - Replaced os.path.exists() with Path().exists() - Replaced os.path.basename() with Path().name - Replaced os.path.dirname() with Path().parent Part of the migration to modern pathlib API for better cross-platform compatibility and cleaner code. Co-Authored-By: djpetti <djpetti@users.noreply.github.com> * refactor: Migrate from os.path to pathlib in src and tests (issue #640) Replaced os.path usage with pathlib.Path throughout: - src/local_deep_research/benchmarks: All os.path.join, exists, dirname, basename, abspath replaced - tests directory: Complete migration of all test files - Improved cross-platform compatibility and code readability - Kept os.path.expandvars in env_settings.py (no pathlib equivalent) Part of pre-commit hook enforcement for pathlib usage. Remaining work: examples/ and scripts/ directories. Co-Authored-By: djpetti * fix: Complete migration from os.path to pathlib.Path (issue #640) Completed manual migration of all os.path usage to pathlib.Path across: - scripts/ directory (3 files) - examples/ directory (25 files total) - examples/benchmarks/ (8 files) - examples/optimization/ (16 files) - examples/show_env_vars.py - src/local_deep_research/settings/env_settings.py Changes made: - Replaced os.path.join() with Path() / syntax - Replaced os.path.exists() with Path().exists() - Replaced os.path.dirname() with Path().parent - Replaced os.path.basename() with Path().name or Path().stem - Replaced os.path.abspath() with Path().resolve() - Replaced os.makedirs() with Path().mkdir(parents=True, exist_ok=True) - Added pathlib import where needed Note: Kept os.path.expandvars in env_settings.py as there is no pathlib equivalent. Added comment explaining this limitation. This completes the pathlib migration for issue #640. Co-Authored-By: djpetti * fix: Allow os.path.expandvars in pathlib pre-commit hook Updated the check-pathlib-usage.py pre-commit hook to skip checking os.path.expandvars since it has no pathlib equivalent. Changes: - Added exception for expandvars in both visit_Attribute and visit_Call methods - Added comment in equivalents dictionary noting expandvars is allowed - This allows env_settings.py to use os.path.expandvars without failing checks This resolves the pre-commit CI failure while maintaining the pathlib enforcement for all other os.path methods. Co-Authored-By: djpetti --------- Co-authored-by: djpetti
197 lines
5.9 KiB
Python
197 lines
5.9 KiB
Python
#!/usr/bin/env python
|
|
"""
|
|
Parameter Optimization Runner for Local Deep Research.
|
|
|
|
This script provides a convenient way to run hyperparameter optimization.
|
|
|
|
Usage:
|
|
# Install dependencies with PDM
|
|
cd /path/to/local-deep-research
|
|
pdm install
|
|
|
|
# Run the script with PDM
|
|
pdm run python examples/optimization/run_optimization.py --help
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sys
|
|
from datetime import datetime, UTC
|
|
from pathlib import Path
|
|
|
|
# Import the optimization functionality
|
|
from local_deep_research.benchmarks.optimization import (
|
|
optimize_for_efficiency,
|
|
optimize_for_quality,
|
|
optimize_for_speed,
|
|
optimize_parameters,
|
|
)
|
|
|
|
|
|
def main():
|
|
"""Run parameter optimization with command-line arguments."""
|
|
parser = argparse.ArgumentParser(
|
|
description="Run parameter optimization for Local Deep Research"
|
|
)
|
|
parser.add_argument("query", help="Research query to optimize for")
|
|
parser.add_argument(
|
|
"--output-dir",
|
|
default=str(Path("examples") / "optimization" / "results"),
|
|
help="Directory to save results",
|
|
)
|
|
parser.add_argument(
|
|
"--search-tool", default="searxng", help="Search tool to use"
|
|
)
|
|
|
|
# LLM configuration options
|
|
parser.add_argument(
|
|
"--model",
|
|
help="Model name for the LLM (e.g., 'claude-3-sonnet-20240229')",
|
|
)
|
|
parser.add_argument(
|
|
"--provider",
|
|
help="Provider for the LLM (e.g., 'anthropic', 'openai', 'openai_endpoint')",
|
|
)
|
|
parser.add_argument(
|
|
"--endpoint-url",
|
|
help="Custom endpoint URL (e.g., 'https://openrouter.ai/api/v1')",
|
|
)
|
|
parser.add_argument("--api-key", help="API key for the LLM provider")
|
|
parser.add_argument(
|
|
"--temperature",
|
|
type=float,
|
|
default=0.7,
|
|
help="Temperature for the LLM (default: 0.7)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--trials",
|
|
type=int,
|
|
default=30,
|
|
help="Number of parameter combinations to try",
|
|
)
|
|
parser.add_argument(
|
|
"--mode",
|
|
choices=["balanced", "speed", "quality", "efficiency"],
|
|
default="balanced",
|
|
help="Optimization mode",
|
|
)
|
|
parser.add_argument(
|
|
"--weights",
|
|
help='Custom weights as JSON string, e.g., \'{"quality": 0.7, "speed": 0.3}\'',
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Create timestamp for unique output directory
|
|
timestamp = datetime.now(UTC).strftime("%Y%m%d_%H%M%S")
|
|
output_dir = str(Path(args.output_dir) / f"opt_{timestamp}")
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
print(
|
|
f"Starting optimization ({args.mode} mode) - results will be saved to {output_dir}"
|
|
)
|
|
|
|
# Parse custom weights if provided
|
|
custom_weights = None
|
|
if args.weights:
|
|
try:
|
|
custom_weights = json.loads(args.weights)
|
|
except json.JSONDecodeError:
|
|
print("Error parsing weights JSON. Using default weights.")
|
|
|
|
# Set environment variables for the API key and endpoint URL if provided
|
|
if args.api_key:
|
|
os.environ["OPENAI_ENDPOINT_API_KEY"] = args.api_key
|
|
os.environ["LDR_LLM__OPENAI_ENDPOINT_API_KEY"] = args.api_key
|
|
|
|
if args.endpoint_url:
|
|
os.environ["OPENAI_ENDPOINT_URL"] = args.endpoint_url
|
|
os.environ["LDR_LLM__OPENAI_ENDPOINT_URL"] = args.endpoint_url
|
|
|
|
if args.model:
|
|
os.environ["LDR_LLM__MODEL"] = args.model
|
|
|
|
if args.provider:
|
|
os.environ["LDR_LLM__PROVIDER"] = args.provider
|
|
|
|
# Run optimization based on mode
|
|
if args.mode == "speed":
|
|
best_params, best_score = optimize_for_speed(
|
|
query=args.query,
|
|
search_tool=args.search_tool,
|
|
n_trials=args.trials,
|
|
model_name=args.model,
|
|
provider=args.provider,
|
|
openai_endpoint_url=args.endpoint_url,
|
|
temperature=args.temperature,
|
|
api_key=args.api_key,
|
|
output_dir=output_dir,
|
|
)
|
|
elif args.mode == "quality":
|
|
best_params, best_score = optimize_for_quality(
|
|
query=args.query,
|
|
search_tool=args.search_tool,
|
|
n_trials=args.trials,
|
|
model_name=args.model,
|
|
provider=args.provider,
|
|
openai_endpoint_url=args.endpoint_url,
|
|
temperature=args.temperature,
|
|
api_key=args.api_key,
|
|
output_dir=output_dir,
|
|
)
|
|
elif args.mode == "efficiency":
|
|
best_params, best_score = optimize_for_efficiency(
|
|
query=args.query,
|
|
search_tool=args.search_tool,
|
|
n_trials=args.trials,
|
|
model_name=args.model,
|
|
provider=args.provider,
|
|
openai_endpoint_url=args.endpoint_url,
|
|
temperature=args.temperature,
|
|
api_key=args.api_key,
|
|
output_dir=output_dir,
|
|
)
|
|
else: # balanced
|
|
best_params, best_score = optimize_parameters(
|
|
query=args.query,
|
|
search_tool=args.search_tool,
|
|
n_trials=args.trials,
|
|
model_name=args.model,
|
|
provider=args.provider,
|
|
openai_endpoint_url=args.endpoint_url,
|
|
temperature=args.temperature,
|
|
api_key=args.api_key,
|
|
output_dir=output_dir,
|
|
metric_weights=custom_weights,
|
|
)
|
|
|
|
print(f"\nOptimization complete! Results saved to {output_dir}")
|
|
print(f"Best parameters: {best_params}")
|
|
print(f"Best score: {best_score:.4f}")
|
|
|
|
# Save summary to a JSON file
|
|
summary = {
|
|
"timestamp": timestamp,
|
|
"query": args.query,
|
|
"mode": args.mode,
|
|
"trials": args.trials,
|
|
"search_tool": args.search_tool,
|
|
"model": args.model,
|
|
"provider": args.provider,
|
|
"temperature": args.temperature,
|
|
"best_parameters": best_params,
|
|
"best_score": best_score,
|
|
"custom_weights": custom_weights,
|
|
}
|
|
|
|
with open(Path(output_dir) / "optimization_summary.json", "w") as f:
|
|
json.dump(summary, f, indent=2)
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|