mirror of
https://github.com/LearningCircuit/local-deep-research.git
synced 2026-06-15 19:46:56 +03:00
* feat: Add pre-commit hook to enforce pathlib usage (issue #640) - Created check-pathlib-usage.py pre-commit hook using AST parsing - Detects os.path usage and suggests pathlib alternatives - Fixed os.path.normpath usage in auth/routes.py to use PurePosixPath - Added hook configuration to .pre-commit-config.yaml The hook provides helpful suggestions for replacing os.path calls with their pathlib equivalents for better cross-platform compatibility. Co-Authored-By: djpetti <djpetti@users.noreply.github.com> * feat: Add missing pathlib pre-commit hook script Co-Authored-By: djpetti <djpetti@users.noreply.github.com> * refactor: Migrate core src modules from os.path to pathlib - Fixed web/app_factory.py, config/llm_config.py, metrics/token_counter.py - Fixed utilities/es_utils.py, web/routes/benchmark_routes.py - Fixed web/routes/settings_routes.py, web_search_engines/engines/search_engine_local.py - Replaced os.path.join() with Path() / syntax - Replaced os.path.exists() with Path().exists() - Replaced os.path.basename() with Path().name - Replaced os.path.dirname() with Path().parent Part of the migration to modern pathlib API for better cross-platform compatibility and cleaner code. Co-Authored-By: djpetti <djpetti@users.noreply.github.com> * refactor: Migrate from os.path to pathlib in src and tests (issue #640) Replaced os.path usage with pathlib.Path throughout: - src/local_deep_research/benchmarks: All os.path.join, exists, dirname, basename, abspath replaced - tests directory: Complete migration of all test files - Improved cross-platform compatibility and code readability - Kept os.path.expandvars in env_settings.py (no pathlib equivalent) Part of pre-commit hook enforcement for pathlib usage. Remaining work: examples/ and scripts/ directories. Co-Authored-By: djpetti * fix: Complete migration from os.path to pathlib.Path (issue #640) Completed manual migration of all os.path usage to pathlib.Path across: - scripts/ directory (3 files) - examples/ directory (25 files total) - examples/benchmarks/ (8 files) - examples/optimization/ (16 files) - examples/show_env_vars.py - src/local_deep_research/settings/env_settings.py Changes made: - Replaced os.path.join() with Path() / syntax - Replaced os.path.exists() with Path().exists() - Replaced os.path.dirname() with Path().parent - Replaced os.path.basename() with Path().name or Path().stem - Replaced os.path.abspath() with Path().resolve() - Replaced os.makedirs() with Path().mkdir(parents=True, exist_ok=True) - Added pathlib import where needed Note: Kept os.path.expandvars in env_settings.py as there is no pathlib equivalent. Added comment explaining this limitation. This completes the pathlib migration for issue #640. Co-Authored-By: djpetti * fix: Allow os.path.expandvars in pathlib pre-commit hook Updated the check-pathlib-usage.py pre-commit hook to skip checking os.path.expandvars since it has no pathlib equivalent. Changes: - Added exception for expandvars in both visit_Attribute and visit_Call methods - Added comment in equivalents dictionary noting expandvars is allowed - This allows env_settings.py to use os.path.expandvars without failing checks This resolves the pre-commit CI failure while maintaining the pathlib enforcement for all other os.path methods. Co-Authored-By: djpetti --------- Co-authored-by: djpetti
117 lines
3.3 KiB
Python
117 lines
3.3 KiB
Python
#!/usr/bin/env python
|
|
"""
|
|
SimpleQA Benchmark Runner for Local Deep Research.
|
|
|
|
This script provides a convenient way to run the SimpleQA benchmark.
|
|
|
|
Usage:
|
|
# Install dependencies with PDM
|
|
cd /path/to/local-deep-research
|
|
pdm install
|
|
|
|
# Run the script with PDM
|
|
pdm run python examples/benchmarks/run_simpleqa.py --help
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Import the benchmark functionality
|
|
from local_deep_research.benchmarks.benchmark_functions import evaluate_simpleqa
|
|
|
|
|
|
def main():
|
|
"""Run the SimpleQA benchmark with the specified parameters."""
|
|
parser = argparse.ArgumentParser(description="Run SimpleQA benchmark")
|
|
parser.add_argument(
|
|
"--examples", type=int, default=10, help="Number of examples to run"
|
|
)
|
|
parser.add_argument(
|
|
"--iterations", type=int, default=3, help="Number of search iterations"
|
|
)
|
|
parser.add_argument(
|
|
"--questions", type=int, default=3, help="Questions per iteration"
|
|
)
|
|
parser.add_argument(
|
|
"--search-tool", type=str, default="searxng", help="Search tool to use"
|
|
)
|
|
parser.add_argument(
|
|
"--output-dir",
|
|
type=str,
|
|
default=str(Path("examples") / "benchmarks" / "results" / "simpleqa"),
|
|
help="Output directory",
|
|
)
|
|
parser.add_argument(
|
|
"--no-eval", action="store_true", help="Skip evaluation"
|
|
)
|
|
|
|
# Optional evaluation parameters
|
|
parser.add_argument(
|
|
"--human-eval", action="store_true", help="Use human evaluation"
|
|
)
|
|
parser.add_argument(
|
|
"--eval-model", type=str, help="Model to use for evaluation"
|
|
)
|
|
parser.add_argument(
|
|
"--eval-provider", type=str, help="Provider to use for evaluation"
|
|
)
|
|
|
|
# Add model configuration options
|
|
parser.add_argument(
|
|
"--search-model", type=str, help="Model to use for the search system"
|
|
)
|
|
parser.add_argument(
|
|
"--search-provider",
|
|
type=str,
|
|
help="Provider to use for the search system",
|
|
)
|
|
parser.add_argument(
|
|
"--endpoint-url",
|
|
type=str,
|
|
help="Endpoint URL for OpenRouter or other API services",
|
|
)
|
|
parser.add_argument(
|
|
"--search-strategy",
|
|
type=str,
|
|
default="source_based",
|
|
choices=[
|
|
"source_based",
|
|
"standard",
|
|
"rapid",
|
|
"parallel",
|
|
"iterdrag",
|
|
"modular",
|
|
],
|
|
help="Search strategy to use (default: source_based)",
|
|
)
|
|
parser.add_argument("--api-key", type=str, help="API key for LLM provider")
|
|
|
|
args = parser.parse_args()
|
|
|
|
print(f"Starting SimpleQA benchmark with {args.examples} examples...")
|
|
|
|
# Run the benchmark
|
|
results = evaluate_simpleqa(
|
|
num_examples=args.examples,
|
|
search_iterations=args.iterations,
|
|
questions_per_iteration=args.questions,
|
|
search_tool=args.search_tool,
|
|
human_evaluation=args.human_eval,
|
|
evaluation_model=args.eval_model,
|
|
evaluation_provider=args.eval_provider,
|
|
output_dir=args.output_dir,
|
|
)
|
|
|
|
# Print summary
|
|
print("\nSimpleQA Benchmark Results:")
|
|
print(f" Accuracy: {results.get('accuracy', 0):.3f}")
|
|
print(f" Total examples: {results.get('total_examples', 0)}")
|
|
print(f" Report saved to: {results.get('report_path', '')}")
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|