Files
local-deep-research/tests/test_citation_handler.py
LearningCircuit d9bc003094 fix: resolve test failures and fix import paths in test suite
- Bulk fix import paths from src.local_deep_research to local_deep_research
  across 190+ test files
- Fix test_settings_routes.py: update validate_setting tests to use proper
  Setting objects instead of incorrect parameter signature
- Fix test_research_service.py: remove tests for non-existent functions,
  correct mimetype assertions for latex export
- Fix test_app_factory.py: use truly public IP (8.8.8.8) instead of
  TEST-NET reserved range for IP validation test
- Fix test_api_functions.py: correct patch paths for get_user_db_session,
  has_request_context, and get_settings_manager
- Fix test_news_api.py: correct patch paths for database session

Test results: 8592 passing, 175 failing (down from 286), 115 skipped
Remaining failures are test isolation issues that pass individually.
2026-01-21 20:50:48 +01:00

372 lines
14 KiB
Python

import sys
from pathlib import Path
from unittest.mock import Mock
import pytest
# Handle import paths for testing
sys.path.append(str(Path(__file__).parent.parent))
from langchain_core.documents import Document
# Now import the CitationHandler - the mocks will be set up by pytest_configure in conftest.py
from local_deep_research.citation_handler import (
CitationHandler,
)
@pytest.fixture
def citation_handler():
"""Create a citation handler with a mocked LLM for testing."""
mock_llm = Mock()
mock_llm.invoke.return_value = Mock(
content="Mocked analysis with citation [1]"
)
return CitationHandler(mock_llm)
@pytest.fixture
def sample_search_results():
"""Sample search results for testing."""
return [
{
"title": "Test Result 1",
"link": "https://example.com/1",
"snippet": "This is the first test result snippet.",
},
{
"title": "Test Result 2",
"link": "https://example.com/2",
"full_content": "This is the full content of the second test result.",
},
]
def test_create_documents_empty(citation_handler):
"""Test document creation with empty search results."""
documents = citation_handler._create_documents([])
assert len(documents) == 0
def test_create_documents_string(citation_handler):
"""Test document creation with string input (error case)."""
documents = citation_handler._create_documents("not a list")
assert len(documents) == 0
def test_create_documents(citation_handler, sample_search_results):
"""Test document creation with valid search results."""
documents = citation_handler._create_documents(sample_search_results)
# Check if the correct number of documents was created
assert len(documents) == 2
# Check first document
assert documents[0].metadata["title"] == "Test Result 1"
assert documents[0].metadata["source"] == "https://example.com/1"
assert documents[0].metadata["index"] == 1
assert documents[0].page_content == "This is the first test result snippet."
# Check second document - should use full_content instead of snippet
assert documents[1].metadata["title"] == "Test Result 2"
assert documents[1].metadata["source"] == "https://example.com/2"
assert documents[1].metadata["index"] == 2
assert (
documents[1].page_content
== "This is the full content of the second test result."
)
def test_create_documents_with_offset(citation_handler, sample_search_results):
"""Test document creation with non-zero starting index."""
documents = citation_handler._create_documents(
sample_search_results, nr_of_links=3
)
# Check if indexes were correctly offset
assert documents[0].metadata["index"] == 4 # 3+1
assert documents[1].metadata["index"] == 5 # 3+2
def test_format_sources(citation_handler):
"""Test formatting document sources for citation."""
docs = [
Document(
page_content="Content 1",
metadata={"source": "src1", "title": "Title 1", "index": 1},
),
Document(
page_content="Content 2",
metadata={"source": "src2", "title": "Title 2", "index": 2},
),
]
formatted = citation_handler._format_sources(docs)
# Check if sources are correctly formatted with citation numbers
assert "[1] Content 1" in formatted
assert "[2] Content 2" in formatted
# Check the order of sources
assert formatted.index("[1]") < formatted.index("[2]")
def test_analyze_initial(citation_handler, sample_search_results):
"""Test initial analysis of search results."""
result = citation_handler.analyze_initial(
"test query", sample_search_results
)
# Check if LLM was called with the correct prompt
citation_handler.llm.invoke.assert_called_once()
prompt_used = citation_handler.llm.invoke.call_args[0][0]
# Check if prompt contains expected elements
assert "test query" in prompt_used
assert "Sources:" in prompt_used
assert "[1]" in prompt_used
assert "[2]" in prompt_used
# Check returned data structure
assert "content" in result
assert "documents" in result
assert result["content"] == "Mocked analysis with citation [1]"
assert len(result["documents"]) == 2
def test_analyze_followup(citation_handler, sample_search_results, monkeypatch):
"""Test follow-up analysis with previous knowledge."""
# Set fact checking enabled through settings snapshot
citation_handler.settings_snapshot = {
"general.enable_fact_checking": {"value": True, "type": "bool"}
}
# Update the handler's settings as well
citation_handler._handler.settings_snapshot = (
citation_handler.settings_snapshot
)
result = citation_handler.analyze_followup(
"follow-up question",
sample_search_results,
"Previous knowledge text",
nr_of_links=2,
)
# LLM should be called twice (fact check + analysis)
assert citation_handler.llm.invoke.call_count == 2
# Check prompt contains fact checking and previous knowledge
analysis_prompt = citation_handler.llm.invoke.call_args[0][0]
assert "Previous Knowledge:" in analysis_prompt
assert "follow-up question" in analysis_prompt
assert "[3]" in analysis_prompt # Should use offset for citations
# Check returned data structure
assert "content" in result
assert "documents" in result
assert len(result["documents"]) == 2
# Check that indexes are correctly offset
assert result["documents"][0].metadata["index"] == 3
def test_analyze_followup_no_fact_check(
citation_handler, sample_search_results, monkeypatch
):
"""Test follow-up analysis with fact checking disabled."""
# Set fact checking disabled through settings snapshot
citation_handler.settings_snapshot = {
"general.enable_fact_checking": {"value": False, "type": "bool"}
}
# Update the handler's settings as well
citation_handler._handler.settings_snapshot = (
citation_handler.settings_snapshot
)
citation_handler.analyze_followup(
"follow-up question",
sample_search_results,
"Previous knowledge text",
nr_of_links=0,
)
# LLM should only be called once (no fact check)
assert citation_handler.llm.invoke.call_count == 1
class TestCitationHandlerType:
"""Tests for citation handler type selection."""
def test_default_handler_is_standard(self):
"""Should use StandardCitationHandler by default."""
mock_llm = Mock()
handler = CitationHandler(mock_llm)
assert "StandardCitationHandler" in type(handler._handler).__name__
def test_explicit_standard_handler(self):
"""Should use StandardCitationHandler when explicitly specified."""
mock_llm = Mock()
handler = CitationHandler(mock_llm, handler_type="standard")
assert "StandardCitationHandler" in type(handler._handler).__name__
def test_forced_handler(self):
"""Should use ForcedAnswerCitationHandler for 'forced' type."""
mock_llm = Mock()
handler = CitationHandler(mock_llm, handler_type="forced")
assert "ForcedAnswerCitationHandler" in type(handler._handler).__name__
def test_forced_answer_handler(self):
"""Should use ForcedAnswerCitationHandler for 'forced_answer' type."""
mock_llm = Mock()
handler = CitationHandler(mock_llm, handler_type="forced_answer")
assert "ForcedAnswerCitationHandler" in type(handler._handler).__name__
def test_browsecomp_handler(self):
"""Should use ForcedAnswerCitationHandler for 'browsecomp' type."""
mock_llm = Mock()
handler = CitationHandler(mock_llm, handler_type="browsecomp")
assert "ForcedAnswerCitationHandler" in type(handler._handler).__name__
def test_precision_handler(self):
"""Should use PrecisionExtractionHandler for 'precision' type."""
mock_llm = Mock()
handler = CitationHandler(mock_llm, handler_type="precision")
assert "PrecisionExtractionHandler" in type(handler._handler).__name__
def test_precision_extraction_handler(self):
"""Should use PrecisionExtractionHandler for 'precision_extraction' type."""
mock_llm = Mock()
handler = CitationHandler(mock_llm, handler_type="precision_extraction")
assert "PrecisionExtractionHandler" in type(handler._handler).__name__
def test_simpleqa_handler(self):
"""Should use PrecisionExtractionHandler for 'simpleqa' type."""
mock_llm = Mock()
handler = CitationHandler(mock_llm, handler_type="simpleqa")
assert "PrecisionExtractionHandler" in type(handler._handler).__name__
def test_unknown_handler_falls_back_to_standard(self):
"""Should fall back to StandardCitationHandler for unknown type."""
mock_llm = Mock()
handler = CitationHandler(mock_llm, handler_type="unknown_type")
assert "StandardCitationHandler" in type(handler._handler).__name__
def test_handler_type_from_settings_snapshot(self):
"""Should read handler type from settings snapshot."""
mock_llm = Mock()
settings = {"citation.handler_type": "forced"}
handler = CitationHandler(mock_llm, settings_snapshot=settings)
assert "ForcedAnswerCitationHandler" in type(handler._handler).__name__
def test_handler_type_from_settings_snapshot_dict_format(self):
"""Should read handler type from settings snapshot with value dict."""
mock_llm = Mock()
settings = {
"citation.handler_type": {"value": "precision", "type": "str"}
}
handler = CitationHandler(mock_llm, settings_snapshot=settings)
assert "PrecisionExtractionHandler" in type(handler._handler).__name__
def test_handler_type_case_insensitive(self):
"""Handler type should be case insensitive."""
mock_llm = Mock()
handler = CitationHandler(mock_llm, handler_type="FORCED")
assert "ForcedAnswerCitationHandler" in type(handler._handler).__name__
def test_settings_snapshot_passed_to_handler(self):
"""Settings snapshot should be passed to the underlying handler."""
mock_llm = Mock()
settings = {"custom_key": "custom_value"}
handler = CitationHandler(mock_llm, settings_snapshot=settings)
assert handler.settings_snapshot == settings
assert handler._handler.settings_snapshot == settings
class TestCitationHandlerDelegation:
"""Tests for CitationHandler delegation to internal handler."""
def test_exposes_create_documents_method(self):
"""Should expose _create_documents from internal handler."""
mock_llm = Mock()
handler = CitationHandler(mock_llm)
assert callable(handler._create_documents)
# Call it to verify it works
result = handler._create_documents([])
assert isinstance(result, list)
def test_exposes_format_sources_method(self):
"""Should expose _format_sources from internal handler."""
mock_llm = Mock()
handler = CitationHandler(mock_llm)
assert callable(handler._format_sources)
def test_analyze_initial_delegates(self):
"""Should delegate analyze_initial to internal handler."""
mock_llm = Mock()
mock_llm.invoke.return_value = Mock(content="Test response")
handler = CitationHandler(mock_llm)
result = handler.analyze_initial("query", [])
assert "content" in result
def test_analyze_followup_delegates(self):
"""Should delegate analyze_followup to internal handler."""
mock_llm = Mock()
mock_llm.invoke.return_value = Mock(content="Test response")
handler = CitationHandler(mock_llm)
result = handler.analyze_followup("question", [], "previous", 0)
assert "content" in result
class TestCitationHandlerEdgeCases:
"""Tests for edge cases in CitationHandler."""
def test_none_settings_snapshot_uses_empty_dict(self):
"""Should use empty dict when settings_snapshot is None."""
mock_llm = Mock()
handler = CitationHandler(mock_llm, settings_snapshot=None)
assert handler.settings_snapshot == {}
def test_empty_settings_snapshot(self):
"""Should work with empty settings snapshot."""
mock_llm = Mock()
handler = CitationHandler(mock_llm, settings_snapshot={})
assert handler.settings_snapshot == {}
# Default handler type should be standard
assert "StandardCitationHandler" in type(handler._handler).__name__
def test_llm_stored_on_handler(self):
"""Should store LLM reference on handler."""
mock_llm = Mock()
handler = CitationHandler(mock_llm)
assert handler.llm is mock_llm
def test_create_documents_with_missing_fields(self):
"""Should handle search results with missing fields."""
mock_llm = Mock()
handler = CitationHandler(mock_llm)
# Result with missing title
results = [{"link": "https://example.com", "snippet": "content"}]
docs = handler._create_documents(results)
assert len(docs) == 1
# Default title is "Source N" when missing
assert docs[0].metadata["title"] == "Source 1"
def test_create_documents_with_none_content(self):
"""Should handle search results with None content."""
mock_llm = Mock()
handler = CitationHandler(mock_llm)
results = [{"title": "Title", "link": "https://example.com"}]
docs = handler._create_documents(results)
assert len(docs) == 1
# Should have empty or default content
assert docs[0].page_content is not None
def test_analyze_initial_with_string_results(self):
"""Should handle string input gracefully."""
mock_llm = Mock()
mock_llm.invoke.return_value = Mock(content="Response")
handler = CitationHandler(mock_llm)
result = handler.analyze_initial("query", "not a list")
assert "content" in result
assert "documents" in result