Files
local-deep-research/examples/example_browsecomp.py
LearningCircuit 7f62bf6f6f docs(examples): pass diagnose=False to logger.add in example scripts (#4185) (#4401)
The four example scripts that re-configure loguru
(examples/example_browsecomp.py and three examples/api_usage/programmatic/
snippets) called logger.add(sys.stderr, ...) without diagnose=False.
loguru defaults diagnose=True, which renders repr() of every local in
every traceback frame on exception.

Users copy these snippets into their own scripts as a starting point;
leaving the default on propagated the same credential-in-traceback leak
that #4384 (config_logger) and the MCP subprocess fix close on the
application side. Pinning diagnose=False here keeps the templates safe
to copy.

No application-code changes; examples-only.
2026-06-02 22:33:21 +02:00

130 lines
4.3 KiB
Python

#!/usr/bin/env python
"""
Test script to validate BrowseComp dataset loading and decryption.
This helps debug issues with the BrowseComp dataset.
"""
import sys
from loguru import logger
# Logger is already imported from loguru
# Set debug level for this script
logger.remove()
# diagnose=False: loguru defaults to True, which renders repr() of every
# local in every traceback frame on exception. Users copy this snippet
# into their own scripts, so leaving the default on would propagate the
# credential-in-traceback leak (#4185) wherever the snippet lands.
logger.add(sys.stderr, level="DEBUG", diagnose=False)
# Add path to import local_deep_research
sys.path.append(".")
try:
from local_deep_research.benchmarks.datasets import decrypt, load_dataset
except ImportError as e:
print(f"Error importing modules: {e}")
sys.exit(1)
def test_browsecomp_decryption():
"""Test decryption of BrowseComp dataset."""
print("\n=== Testing BrowseComp Decryption ===\n")
try:
# Load a small number of examples to test
examples = load_dataset("browsecomp", num_examples=3)
if not examples:
print("Error: No examples loaded from dataset")
return
print(
f"Successfully loaded {len(examples)} examples from BrowseComp dataset\n"
)
# Check if decryption worked by examining examples
for i, example in enumerate(examples):
print(f"Example {i + 1}:")
print(f" ID: {example.get('id', 'unknown')}")
# Check if we have decrypted data
if "original_problem" in example:
print(" Decryption successful!")
print(
f" Original problem (encrypted): {example.get('original_problem', '')[:50]}..."
)
print(
f" Decrypted problem: {example.get('problem', '')[:50]}..."
)
print(
f" Decrypted answer: {example.get('correct_answer', '')[:50]}..."
)
else:
print(
" Decryption may have failed - no original_problem field"
)
print(f" Problem: {example.get('problem', '')[:50]}...")
print(f" Answer: {example.get('answer', '')[:50]}...")
# Try manual decryption
canary = example.get("canary", "")
if canary:
print("\n Attempting manual decryption...")
try:
problem = example.get("problem", "")
answer = example.get("answer", "")
decrypted_problem = decrypt(problem, canary)
decrypted_answer = decrypt(answer, canary)
print(
f" Manually decrypted problem: {decrypted_problem[:50]}..."
)
print(
f" Manually decrypted answer: {decrypted_answer[:50]}..."
)
except Exception as e:
print(f" Manual decryption failed: {e}")
else:
print(" No canary found for manual decryption")
print()
except Exception as e:
print(f"Error in test: {e}")
def test_simpleqa_loading():
"""Test loading of SimpleQA dataset for comparison."""
print("\n=== Testing SimpleQA Loading ===\n")
try:
# Load a small number of examples to test
examples = load_dataset("simpleqa", num_examples=3)
if not examples:
print("Error: No examples loaded from dataset")
return
print(
f"Successfully loaded {len(examples)} examples from SimpleQA dataset\n"
)
# Check examples
for i, example in enumerate(examples):
print(f"Example {i + 1}:")
print(f" ID: {example.get('id', 'unknown')}")
print(f" Problem: {example.get('problem', '')[:50]}...")
print(f" Answer: {example.get('answer', '')[:50]}...")
print()
except Exception as e:
print(f"Error in test: {e}")
if __name__ == "__main__":
# Test both datasets for comparison
test_browsecomp_decryption()
test_simpleqa_loading()