Files
local-deep-research/examples/example_browsecomp.py
LearningCircuit c8dc94feb1 fix: Major pre-commit hook compliance improvements
- Replace standard logging with loguru across codebase (50+ files)
- Fix logger.exception usage in exception handlers (100+ fixes)
- Update environment variable access to use SettingsManager
- Improve pre-commit hooks to handle legitimate use cases
- Fix formatting and linting issues

Reduced custom code check violations from 268 to 38 (86% reduction)
Reduced env var check violations from 73 to 57 (22% reduction)
2025-07-16 00:33:02 +02:00

126 lines
4.0 KiB
Python

#!/usr/bin/env python
"""
Test script to validate BrowseComp dataset loading and decryption.
This helps debug issues with the BrowseComp dataset.
"""
import sys
from loguru import logger
# Logger is already imported from loguru
# Set debug level for this script
logger.remove()
logger.add(sys.stderr, level="DEBUG")
# Add path to import local_deep_research
sys.path.append(".")
try:
from local_deep_research.benchmarks.datasets import decrypt, load_dataset
except ImportError as e:
print(f"Error importing modules: {e}")
sys.exit(1)
def test_browsecomp_decryption():
"""Test decryption of BrowseComp dataset."""
print("\n=== Testing BrowseComp Decryption ===\n")
try:
# Load a small number of examples to test
examples = load_dataset("browsecomp", num_examples=3)
if not examples:
print("Error: No examples loaded from dataset")
return
print(
f"Successfully loaded {len(examples)} examples from BrowseComp dataset\n"
)
# Check if decryption worked by examining examples
for i, example in enumerate(examples):
print(f"Example {i + 1}:")
print(f" ID: {example.get('id', 'unknown')}")
# Check if we have decrypted data
if "original_problem" in example:
print(" Decryption successful!")
print(
f" Original problem (encrypted): {example.get('original_problem', '')[:50]}..."
)
print(
f" Decrypted problem: {example.get('problem', '')[:50]}..."
)
print(
f" Decrypted answer: {example.get('correct_answer', '')[:50]}..."
)
else:
print(
" Decryption may have failed - no original_problem field"
)
print(f" Problem: {example.get('problem', '')[:50]}...")
print(f" Answer: {example.get('answer', '')[:50]}...")
# Try manual decryption
canary = example.get("canary", "")
if canary:
print("\n Attempting manual decryption...")
try:
problem = example.get("problem", "")
answer = example.get("answer", "")
decrypted_problem = decrypt(problem, canary)
decrypted_answer = decrypt(answer, canary)
print(
f" Manually decrypted problem: {decrypted_problem[:50]}..."
)
print(
f" Manually decrypted answer: {decrypted_answer[:50]}..."
)
except Exception as e:
print(f" Manual decryption failed: {e}")
else:
print(" No canary found for manual decryption")
print()
except Exception as e:
print(f"Error in test: {e}")
def test_simpleqa_loading():
"""Test loading of SimpleQA dataset for comparison."""
print("\n=== Testing SimpleQA Loading ===\n")
try:
# Load a small number of examples to test
examples = load_dataset("simpleqa", num_examples=3)
if not examples:
print("Error: No examples loaded from dataset")
return
print(
f"Successfully loaded {len(examples)} examples from SimpleQA dataset\n"
)
# Check examples
for i, example in enumerate(examples):
print(f"Example {i + 1}:")
print(f" ID: {example.get('id', 'unknown')}")
print(f" Problem: {example.get('problem', '')[:50]}...")
print(f" Answer: {example.get('answer', '')[:50]}...")
print()
except Exception as e:
print(f"Error in test: {e}")
if __name__ == "__main__":
# Test both datasets for comparison
test_browsecomp_decryption()
test_simpleqa_loading()