mirror of
https://github.com/LearningCircuit/local-deep-research.git
synced 2026-06-16 03:51:07 +03:00
* Install Ruff and fix all the Ruff errors. * Fix pre-commit failures. * Potential fix for code scanning alert no. 104: Information exposure through an exception Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> * Fix pre-commit failures. --------- Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
127 lines
4.0 KiB
Python
127 lines
4.0 KiB
Python
#!/usr/bin/env python
|
|
"""
|
|
Test script to validate BrowseComp dataset loading and decryption.
|
|
This helps debug issues with the BrowseComp dataset.
|
|
"""
|
|
|
|
import logging
|
|
import sys
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.DEBUG,
|
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
)
|
|
logger = logging.getLogger()
|
|
|
|
# Add path to import local_deep_research
|
|
sys.path.append(".")
|
|
|
|
try:
|
|
from local_deep_research.benchmarks.datasets import decrypt, load_dataset
|
|
except ImportError as e:
|
|
print(f"Error importing modules: {e}")
|
|
sys.exit(1)
|
|
|
|
|
|
def test_browsecomp_decryption():
|
|
"""Test decryption of BrowseComp dataset."""
|
|
print("\n=== Testing BrowseComp Decryption ===\n")
|
|
|
|
try:
|
|
# Load a small number of examples to test
|
|
examples = load_dataset("browsecomp", num_examples=3)
|
|
|
|
if not examples:
|
|
print("Error: No examples loaded from dataset")
|
|
return
|
|
|
|
print(
|
|
f"Successfully loaded {len(examples)} examples from BrowseComp dataset\n"
|
|
)
|
|
|
|
# Check if decryption worked by examining examples
|
|
for i, example in enumerate(examples):
|
|
print(f"Example {i + 1}:")
|
|
print(f" ID: {example.get('id', 'unknown')}")
|
|
|
|
# Check if we have decrypted data
|
|
if "original_problem" in example:
|
|
print(" Decryption successful!")
|
|
print(
|
|
f" Original problem (encrypted): {example.get('original_problem', '')[:50]}..."
|
|
)
|
|
print(
|
|
f" Decrypted problem: {example.get('problem', '')[:50]}..."
|
|
)
|
|
print(
|
|
f" Decrypted answer: {example.get('correct_answer', '')[:50]}..."
|
|
)
|
|
else:
|
|
print(
|
|
" Decryption may have failed - no original_problem field"
|
|
)
|
|
print(f" Problem: {example.get('problem', '')[:50]}...")
|
|
print(f" Answer: {example.get('answer', '')[:50]}...")
|
|
|
|
# Try manual decryption
|
|
canary = example.get("canary", "")
|
|
if canary:
|
|
print("\n Attempting manual decryption...")
|
|
try:
|
|
problem = example.get("problem", "")
|
|
answer = example.get("answer", "")
|
|
|
|
decrypted_problem = decrypt(problem, canary)
|
|
decrypted_answer = decrypt(answer, canary)
|
|
|
|
print(
|
|
f" Manually decrypted problem: {decrypted_problem[:50]}..."
|
|
)
|
|
print(
|
|
f" Manually decrypted answer: {decrypted_answer[:50]}..."
|
|
)
|
|
except Exception as e:
|
|
print(f" Manual decryption failed: {e}")
|
|
else:
|
|
print(" No canary found for manual decryption")
|
|
|
|
print()
|
|
|
|
except Exception as e:
|
|
print(f"Error in test: {e}")
|
|
|
|
|
|
def test_simpleqa_loading():
|
|
"""Test loading of SimpleQA dataset for comparison."""
|
|
print("\n=== Testing SimpleQA Loading ===\n")
|
|
|
|
try:
|
|
# Load a small number of examples to test
|
|
examples = load_dataset("simpleqa", num_examples=3)
|
|
|
|
if not examples:
|
|
print("Error: No examples loaded from dataset")
|
|
return
|
|
|
|
print(
|
|
f"Successfully loaded {len(examples)} examples from SimpleQA dataset\n"
|
|
)
|
|
|
|
# Check examples
|
|
for i, example in enumerate(examples):
|
|
print(f"Example {i + 1}:")
|
|
print(f" ID: {example.get('id', 'unknown')}")
|
|
print(f" Problem: {example.get('problem', '')[:50]}...")
|
|
print(f" Answer: {example.get('answer', '')[:50]}...")
|
|
print()
|
|
|
|
except Exception as e:
|
|
print(f"Error in test: {e}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Test both datasets for comparison
|
|
test_browsecomp_decryption()
|
|
test_simpleqa_loading()
|