Files
local-deep-research/examples/example_browsecomp.py
Daniel Petti 0a488db081 Install Ruff and fix all the Ruff errors. (#428)
* Install Ruff and fix all the Ruff errors.

* Fix pre-commit failures.

* Potential fix for code scanning alert no. 104: Information exposure through an exception

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>

* Fix pre-commit failures.

---------

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
2025-06-05 16:03:01 -04:00

127 lines
4.0 KiB
Python

#!/usr/bin/env python
"""
Test script to validate BrowseComp dataset loading and decryption.
This helps debug issues with the BrowseComp dataset.
"""
import logging
import sys
# Configure logging
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger()
# Add path to import local_deep_research
sys.path.append(".")
try:
from local_deep_research.benchmarks.datasets import decrypt, load_dataset
except ImportError as e:
print(f"Error importing modules: {e}")
sys.exit(1)
def test_browsecomp_decryption():
"""Test decryption of BrowseComp dataset."""
print("\n=== Testing BrowseComp Decryption ===\n")
try:
# Load a small number of examples to test
examples = load_dataset("browsecomp", num_examples=3)
if not examples:
print("Error: No examples loaded from dataset")
return
print(
f"Successfully loaded {len(examples)} examples from BrowseComp dataset\n"
)
# Check if decryption worked by examining examples
for i, example in enumerate(examples):
print(f"Example {i + 1}:")
print(f" ID: {example.get('id', 'unknown')}")
# Check if we have decrypted data
if "original_problem" in example:
print(" Decryption successful!")
print(
f" Original problem (encrypted): {example.get('original_problem', '')[:50]}..."
)
print(
f" Decrypted problem: {example.get('problem', '')[:50]}..."
)
print(
f" Decrypted answer: {example.get('correct_answer', '')[:50]}..."
)
else:
print(
" Decryption may have failed - no original_problem field"
)
print(f" Problem: {example.get('problem', '')[:50]}...")
print(f" Answer: {example.get('answer', '')[:50]}...")
# Try manual decryption
canary = example.get("canary", "")
if canary:
print("\n Attempting manual decryption...")
try:
problem = example.get("problem", "")
answer = example.get("answer", "")
decrypted_problem = decrypt(problem, canary)
decrypted_answer = decrypt(answer, canary)
print(
f" Manually decrypted problem: {decrypted_problem[:50]}..."
)
print(
f" Manually decrypted answer: {decrypted_answer[:50]}..."
)
except Exception as e:
print(f" Manual decryption failed: {e}")
else:
print(" No canary found for manual decryption")
print()
except Exception as e:
print(f"Error in test: {e}")
def test_simpleqa_loading():
"""Test loading of SimpleQA dataset for comparison."""
print("\n=== Testing SimpleQA Loading ===\n")
try:
# Load a small number of examples to test
examples = load_dataset("simpleqa", num_examples=3)
if not examples:
print("Error: No examples loaded from dataset")
return
print(
f"Successfully loaded {len(examples)} examples from SimpleQA dataset\n"
)
# Check examples
for i, example in enumerate(examples):
print(f"Example {i + 1}:")
print(f" ID: {example.get('id', 'unknown')}")
print(f" Problem: {example.get('problem', '')[:50]}...")
print(f" Answer: {example.get('answer', '')[:50]}...")
print()
except Exception as e:
print(f"Error in test: {e}")
if __name__ == "__main__":
# Test both datasets for comparison
test_browsecomp_decryption()
test_simpleqa_loading()