mirror of
https://github.com/LearningCircuit/local-deep-research.git
synced 2026-06-16 03:51:07 +03:00
- Bulk fix import paths from src.local_deep_research to local_deep_research across 190+ test files - Fix test_settings_routes.py: update validate_setting tests to use proper Setting objects instead of incorrect parameter signature - Fix test_research_service.py: remove tests for non-existent functions, correct mimetype assertions for latex export - Fix test_app_factory.py: use truly public IP (8.8.8.8) instead of TEST-NET reserved range for IP validation test - Fix test_api_functions.py: correct patch paths for get_user_db_session, has_request_context, and get_settings_manager - Fix test_news_api.py: correct patch paths for database session Test results: 8592 passing, 175 failing (down from 286), 115 skipped Remaining failures are test isolation issues that pass individually.
372 lines
14 KiB
Python
372 lines
14 KiB
Python
import sys
|
|
from pathlib import Path
|
|
from unittest.mock import Mock
|
|
|
|
import pytest
|
|
|
|
# Handle import paths for testing
|
|
sys.path.append(str(Path(__file__).parent.parent))
|
|
|
|
from langchain_core.documents import Document
|
|
|
|
# Now import the CitationHandler - the mocks will be set up by pytest_configure in conftest.py
|
|
from local_deep_research.citation_handler import (
|
|
CitationHandler,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def citation_handler():
|
|
"""Create a citation handler with a mocked LLM for testing."""
|
|
mock_llm = Mock()
|
|
mock_llm.invoke.return_value = Mock(
|
|
content="Mocked analysis with citation [1]"
|
|
)
|
|
return CitationHandler(mock_llm)
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_search_results():
|
|
"""Sample search results for testing."""
|
|
return [
|
|
{
|
|
"title": "Test Result 1",
|
|
"link": "https://example.com/1",
|
|
"snippet": "This is the first test result snippet.",
|
|
},
|
|
{
|
|
"title": "Test Result 2",
|
|
"link": "https://example.com/2",
|
|
"full_content": "This is the full content of the second test result.",
|
|
},
|
|
]
|
|
|
|
|
|
def test_create_documents_empty(citation_handler):
|
|
"""Test document creation with empty search results."""
|
|
documents = citation_handler._create_documents([])
|
|
assert len(documents) == 0
|
|
|
|
|
|
def test_create_documents_string(citation_handler):
|
|
"""Test document creation with string input (error case)."""
|
|
documents = citation_handler._create_documents("not a list")
|
|
assert len(documents) == 0
|
|
|
|
|
|
def test_create_documents(citation_handler, sample_search_results):
|
|
"""Test document creation with valid search results."""
|
|
documents = citation_handler._create_documents(sample_search_results)
|
|
|
|
# Check if the correct number of documents was created
|
|
assert len(documents) == 2
|
|
|
|
# Check first document
|
|
assert documents[0].metadata["title"] == "Test Result 1"
|
|
assert documents[0].metadata["source"] == "https://example.com/1"
|
|
assert documents[0].metadata["index"] == 1
|
|
assert documents[0].page_content == "This is the first test result snippet."
|
|
|
|
# Check second document - should use full_content instead of snippet
|
|
assert documents[1].metadata["title"] == "Test Result 2"
|
|
assert documents[1].metadata["source"] == "https://example.com/2"
|
|
assert documents[1].metadata["index"] == 2
|
|
assert (
|
|
documents[1].page_content
|
|
== "This is the full content of the second test result."
|
|
)
|
|
|
|
|
|
def test_create_documents_with_offset(citation_handler, sample_search_results):
|
|
"""Test document creation with non-zero starting index."""
|
|
documents = citation_handler._create_documents(
|
|
sample_search_results, nr_of_links=3
|
|
)
|
|
|
|
# Check if indexes were correctly offset
|
|
assert documents[0].metadata["index"] == 4 # 3+1
|
|
assert documents[1].metadata["index"] == 5 # 3+2
|
|
|
|
|
|
def test_format_sources(citation_handler):
|
|
"""Test formatting document sources for citation."""
|
|
docs = [
|
|
Document(
|
|
page_content="Content 1",
|
|
metadata={"source": "src1", "title": "Title 1", "index": 1},
|
|
),
|
|
Document(
|
|
page_content="Content 2",
|
|
metadata={"source": "src2", "title": "Title 2", "index": 2},
|
|
),
|
|
]
|
|
|
|
formatted = citation_handler._format_sources(docs)
|
|
|
|
# Check if sources are correctly formatted with citation numbers
|
|
assert "[1] Content 1" in formatted
|
|
assert "[2] Content 2" in formatted
|
|
# Check the order of sources
|
|
assert formatted.index("[1]") < formatted.index("[2]")
|
|
|
|
|
|
def test_analyze_initial(citation_handler, sample_search_results):
|
|
"""Test initial analysis of search results."""
|
|
result = citation_handler.analyze_initial(
|
|
"test query", sample_search_results
|
|
)
|
|
|
|
# Check if LLM was called with the correct prompt
|
|
citation_handler.llm.invoke.assert_called_once()
|
|
prompt_used = citation_handler.llm.invoke.call_args[0][0]
|
|
|
|
# Check if prompt contains expected elements
|
|
assert "test query" in prompt_used
|
|
assert "Sources:" in prompt_used
|
|
assert "[1]" in prompt_used
|
|
assert "[2]" in prompt_used
|
|
|
|
# Check returned data structure
|
|
assert "content" in result
|
|
assert "documents" in result
|
|
assert result["content"] == "Mocked analysis with citation [1]"
|
|
assert len(result["documents"]) == 2
|
|
|
|
|
|
def test_analyze_followup(citation_handler, sample_search_results, monkeypatch):
|
|
"""Test follow-up analysis with previous knowledge."""
|
|
|
|
# Set fact checking enabled through settings snapshot
|
|
citation_handler.settings_snapshot = {
|
|
"general.enable_fact_checking": {"value": True, "type": "bool"}
|
|
}
|
|
# Update the handler's settings as well
|
|
citation_handler._handler.settings_snapshot = (
|
|
citation_handler.settings_snapshot
|
|
)
|
|
|
|
result = citation_handler.analyze_followup(
|
|
"follow-up question",
|
|
sample_search_results,
|
|
"Previous knowledge text",
|
|
nr_of_links=2,
|
|
)
|
|
|
|
# LLM should be called twice (fact check + analysis)
|
|
assert citation_handler.llm.invoke.call_count == 2
|
|
|
|
# Check prompt contains fact checking and previous knowledge
|
|
analysis_prompt = citation_handler.llm.invoke.call_args[0][0]
|
|
assert "Previous Knowledge:" in analysis_prompt
|
|
assert "follow-up question" in analysis_prompt
|
|
assert "[3]" in analysis_prompt # Should use offset for citations
|
|
|
|
# Check returned data structure
|
|
assert "content" in result
|
|
assert "documents" in result
|
|
assert len(result["documents"]) == 2
|
|
# Check that indexes are correctly offset
|
|
assert result["documents"][0].metadata["index"] == 3
|
|
|
|
|
|
def test_analyze_followup_no_fact_check(
|
|
citation_handler, sample_search_results, monkeypatch
|
|
):
|
|
"""Test follow-up analysis with fact checking disabled."""
|
|
|
|
# Set fact checking disabled through settings snapshot
|
|
citation_handler.settings_snapshot = {
|
|
"general.enable_fact_checking": {"value": False, "type": "bool"}
|
|
}
|
|
# Update the handler's settings as well
|
|
citation_handler._handler.settings_snapshot = (
|
|
citation_handler.settings_snapshot
|
|
)
|
|
|
|
citation_handler.analyze_followup(
|
|
"follow-up question",
|
|
sample_search_results,
|
|
"Previous knowledge text",
|
|
nr_of_links=0,
|
|
)
|
|
|
|
# LLM should only be called once (no fact check)
|
|
assert citation_handler.llm.invoke.call_count == 1
|
|
|
|
|
|
class TestCitationHandlerType:
|
|
"""Tests for citation handler type selection."""
|
|
|
|
def test_default_handler_is_standard(self):
|
|
"""Should use StandardCitationHandler by default."""
|
|
mock_llm = Mock()
|
|
handler = CitationHandler(mock_llm)
|
|
assert "StandardCitationHandler" in type(handler._handler).__name__
|
|
|
|
def test_explicit_standard_handler(self):
|
|
"""Should use StandardCitationHandler when explicitly specified."""
|
|
mock_llm = Mock()
|
|
handler = CitationHandler(mock_llm, handler_type="standard")
|
|
assert "StandardCitationHandler" in type(handler._handler).__name__
|
|
|
|
def test_forced_handler(self):
|
|
"""Should use ForcedAnswerCitationHandler for 'forced' type."""
|
|
mock_llm = Mock()
|
|
handler = CitationHandler(mock_llm, handler_type="forced")
|
|
assert "ForcedAnswerCitationHandler" in type(handler._handler).__name__
|
|
|
|
def test_forced_answer_handler(self):
|
|
"""Should use ForcedAnswerCitationHandler for 'forced_answer' type."""
|
|
mock_llm = Mock()
|
|
handler = CitationHandler(mock_llm, handler_type="forced_answer")
|
|
assert "ForcedAnswerCitationHandler" in type(handler._handler).__name__
|
|
|
|
def test_browsecomp_handler(self):
|
|
"""Should use ForcedAnswerCitationHandler for 'browsecomp' type."""
|
|
mock_llm = Mock()
|
|
handler = CitationHandler(mock_llm, handler_type="browsecomp")
|
|
assert "ForcedAnswerCitationHandler" in type(handler._handler).__name__
|
|
|
|
def test_precision_handler(self):
|
|
"""Should use PrecisionExtractionHandler for 'precision' type."""
|
|
mock_llm = Mock()
|
|
handler = CitationHandler(mock_llm, handler_type="precision")
|
|
assert "PrecisionExtractionHandler" in type(handler._handler).__name__
|
|
|
|
def test_precision_extraction_handler(self):
|
|
"""Should use PrecisionExtractionHandler for 'precision_extraction' type."""
|
|
mock_llm = Mock()
|
|
handler = CitationHandler(mock_llm, handler_type="precision_extraction")
|
|
assert "PrecisionExtractionHandler" in type(handler._handler).__name__
|
|
|
|
def test_simpleqa_handler(self):
|
|
"""Should use PrecisionExtractionHandler for 'simpleqa' type."""
|
|
mock_llm = Mock()
|
|
handler = CitationHandler(mock_llm, handler_type="simpleqa")
|
|
assert "PrecisionExtractionHandler" in type(handler._handler).__name__
|
|
|
|
def test_unknown_handler_falls_back_to_standard(self):
|
|
"""Should fall back to StandardCitationHandler for unknown type."""
|
|
mock_llm = Mock()
|
|
handler = CitationHandler(mock_llm, handler_type="unknown_type")
|
|
assert "StandardCitationHandler" in type(handler._handler).__name__
|
|
|
|
def test_handler_type_from_settings_snapshot(self):
|
|
"""Should read handler type from settings snapshot."""
|
|
mock_llm = Mock()
|
|
settings = {"citation.handler_type": "forced"}
|
|
handler = CitationHandler(mock_llm, settings_snapshot=settings)
|
|
assert "ForcedAnswerCitationHandler" in type(handler._handler).__name__
|
|
|
|
def test_handler_type_from_settings_snapshot_dict_format(self):
|
|
"""Should read handler type from settings snapshot with value dict."""
|
|
mock_llm = Mock()
|
|
settings = {
|
|
"citation.handler_type": {"value": "precision", "type": "str"}
|
|
}
|
|
handler = CitationHandler(mock_llm, settings_snapshot=settings)
|
|
assert "PrecisionExtractionHandler" in type(handler._handler).__name__
|
|
|
|
def test_handler_type_case_insensitive(self):
|
|
"""Handler type should be case insensitive."""
|
|
mock_llm = Mock()
|
|
handler = CitationHandler(mock_llm, handler_type="FORCED")
|
|
assert "ForcedAnswerCitationHandler" in type(handler._handler).__name__
|
|
|
|
def test_settings_snapshot_passed_to_handler(self):
|
|
"""Settings snapshot should be passed to the underlying handler."""
|
|
mock_llm = Mock()
|
|
settings = {"custom_key": "custom_value"}
|
|
handler = CitationHandler(mock_llm, settings_snapshot=settings)
|
|
assert handler.settings_snapshot == settings
|
|
assert handler._handler.settings_snapshot == settings
|
|
|
|
|
|
class TestCitationHandlerDelegation:
|
|
"""Tests for CitationHandler delegation to internal handler."""
|
|
|
|
def test_exposes_create_documents_method(self):
|
|
"""Should expose _create_documents from internal handler."""
|
|
mock_llm = Mock()
|
|
handler = CitationHandler(mock_llm)
|
|
assert callable(handler._create_documents)
|
|
# Call it to verify it works
|
|
result = handler._create_documents([])
|
|
assert isinstance(result, list)
|
|
|
|
def test_exposes_format_sources_method(self):
|
|
"""Should expose _format_sources from internal handler."""
|
|
mock_llm = Mock()
|
|
handler = CitationHandler(mock_llm)
|
|
assert callable(handler._format_sources)
|
|
|
|
def test_analyze_initial_delegates(self):
|
|
"""Should delegate analyze_initial to internal handler."""
|
|
mock_llm = Mock()
|
|
mock_llm.invoke.return_value = Mock(content="Test response")
|
|
handler = CitationHandler(mock_llm)
|
|
result = handler.analyze_initial("query", [])
|
|
assert "content" in result
|
|
|
|
def test_analyze_followup_delegates(self):
|
|
"""Should delegate analyze_followup to internal handler."""
|
|
mock_llm = Mock()
|
|
mock_llm.invoke.return_value = Mock(content="Test response")
|
|
handler = CitationHandler(mock_llm)
|
|
result = handler.analyze_followup("question", [], "previous", 0)
|
|
assert "content" in result
|
|
|
|
|
|
class TestCitationHandlerEdgeCases:
|
|
"""Tests for edge cases in CitationHandler."""
|
|
|
|
def test_none_settings_snapshot_uses_empty_dict(self):
|
|
"""Should use empty dict when settings_snapshot is None."""
|
|
mock_llm = Mock()
|
|
handler = CitationHandler(mock_llm, settings_snapshot=None)
|
|
assert handler.settings_snapshot == {}
|
|
|
|
def test_empty_settings_snapshot(self):
|
|
"""Should work with empty settings snapshot."""
|
|
mock_llm = Mock()
|
|
handler = CitationHandler(mock_llm, settings_snapshot={})
|
|
assert handler.settings_snapshot == {}
|
|
# Default handler type should be standard
|
|
assert "StandardCitationHandler" in type(handler._handler).__name__
|
|
|
|
def test_llm_stored_on_handler(self):
|
|
"""Should store LLM reference on handler."""
|
|
mock_llm = Mock()
|
|
handler = CitationHandler(mock_llm)
|
|
assert handler.llm is mock_llm
|
|
|
|
def test_create_documents_with_missing_fields(self):
|
|
"""Should handle search results with missing fields."""
|
|
mock_llm = Mock()
|
|
handler = CitationHandler(mock_llm)
|
|
# Result with missing title
|
|
results = [{"link": "https://example.com", "snippet": "content"}]
|
|
docs = handler._create_documents(results)
|
|
assert len(docs) == 1
|
|
# Default title is "Source N" when missing
|
|
assert docs[0].metadata["title"] == "Source 1"
|
|
|
|
def test_create_documents_with_none_content(self):
|
|
"""Should handle search results with None content."""
|
|
mock_llm = Mock()
|
|
handler = CitationHandler(mock_llm)
|
|
results = [{"title": "Title", "link": "https://example.com"}]
|
|
docs = handler._create_documents(results)
|
|
assert len(docs) == 1
|
|
# Should have empty or default content
|
|
assert docs[0].page_content is not None
|
|
|
|
def test_analyze_initial_with_string_results(self):
|
|
"""Should handle string input gracefully."""
|
|
mock_llm = Mock()
|
|
mock_llm.invoke.return_value = Mock(content="Response")
|
|
handler = CitationHandler(mock_llm)
|
|
result = handler.analyze_initial("query", "not a list")
|
|
assert "content" in result
|
|
assert "documents" in result
|