mirror of
https://github.com/LearningCircuit/local-deep-research.git
synced 2026-06-15 19:46:56 +03:00
- Bulk fix import paths from src.local_deep_research to local_deep_research across 190+ test files - Fix test_settings_routes.py: update validate_setting tests to use proper Setting objects instead of incorrect parameter signature - Fix test_research_service.py: remove tests for non-existent functions, correct mimetype assertions for latex export - Fix test_app_factory.py: use truly public IP (8.8.8.8) instead of TEST-NET reserved range for IP validation test - Fix test_api_functions.py: correct patch paths for get_user_db_session, has_request_context, and get_settings_manager - Fix test_news_api.py: correct patch paths for database session Test results: 8592 passing, 175 failing (down from 286), 115 skipped Remaining failures are test isolation issues that pass individually.
166 lines
5.7 KiB
Python
166 lines
5.7 KiB
Python
"""
|
|
Test for Wikipedia URL validation security fix.
|
|
|
|
This addresses the security issue mentioned in PR #361 review.
|
|
"""
|
|
|
|
from tests.fixtures.search_engine_mocks import validate_wikipedia_url
|
|
|
|
|
|
class TestWikipediaURLSecurity:
|
|
"""Test Wikipedia URL validation for security."""
|
|
|
|
def test_valid_wikipedia_urls(self):
|
|
"""Test that valid Wikipedia URLs are accepted."""
|
|
valid_urls = [
|
|
"https://en.wikipedia.org/wiki/Python",
|
|
"https://wikipedia.org/wiki/Main_Page",
|
|
"https://fr.wikipedia.org/wiki/Accueil",
|
|
"https://de.wikipedia.org/wiki/Deutschland",
|
|
"https://es.wikipedia.org/wiki/España",
|
|
"https://ja.wikipedia.org/wiki/日本",
|
|
"https://zh.wikipedia.org/wiki/中国",
|
|
"https://simple.wikipedia.org/wiki/Computer",
|
|
"https://meta.wikipedia.org/wiki/Main_Page",
|
|
]
|
|
|
|
for url in valid_urls:
|
|
assert validate_wikipedia_url(url) is True, (
|
|
f"Valid URL rejected: {url}"
|
|
)
|
|
|
|
def test_invalid_wikipedia_urls(self):
|
|
"""Test that invalid/malicious URLs are rejected."""
|
|
invalid_urls = [
|
|
# Non-Wikipedia domains
|
|
"https://fakepedia.org/wiki/Test",
|
|
"https://wikipedia.com/wiki/Test", # .com not .org
|
|
"https://wikipediaorg.com/wiki/Test",
|
|
"https://en-wikipedia.org/wiki/Test", # Hyphenated domain
|
|
# Malicious attempts
|
|
"https://wikipedia.org.evil.com/wiki/Test",
|
|
"https://evil.com/wikipedia.org/wiki/Test",
|
|
"https://wikipedia.org@evil.com/wiki/Test",
|
|
"https://wikipedia.org.evil.com",
|
|
# Invalid URLs
|
|
"not-a-url",
|
|
"ftp://wikipedia.org/wiki/Test",
|
|
"javascript:alert('xss')",
|
|
"file:///etc/passwd",
|
|
"",
|
|
None,
|
|
# Edge cases
|
|
"https://wikipediaXorg/wiki/Test",
|
|
"https://wikipedia_org/wiki/Test",
|
|
"https://wikipedia..org/wiki/Test",
|
|
]
|
|
|
|
for url in invalid_urls:
|
|
assert validate_wikipedia_url(url) is False, (
|
|
f"Invalid URL accepted: {url}"
|
|
)
|
|
|
|
def test_url_validation_handles_exceptions(self):
|
|
"""Test that URL validation handles exceptions gracefully."""
|
|
# Test with various problematic inputs
|
|
problematic_inputs = [
|
|
123, # Integer
|
|
[], # List
|
|
{}, # Dict
|
|
object(), # Generic object
|
|
float("inf"), # Infinity
|
|
float("nan"), # NaN
|
|
]
|
|
|
|
for input_val in problematic_inputs:
|
|
# Should return False without raising exception
|
|
assert validate_wikipedia_url(input_val) is False
|
|
|
|
def test_wikipedia_search_uses_validation(self, monkeypatch):
|
|
"""Test that Wikipedia search engine uses URL validation."""
|
|
from tests.test_utils import add_src_to_path
|
|
|
|
add_src_to_path()
|
|
|
|
# Import after adding src to path
|
|
from local_deep_research.web_search_engines.engines.search_engine_wikipedia import (
|
|
WikipediaSearchEngine,
|
|
)
|
|
|
|
# The WikipediaSearchEngine should construct safe URLs internally
|
|
# It should not use user-provided URLs directly
|
|
search = WikipediaSearchEngine(max_results=1)
|
|
|
|
# Mock the wikipedia module's search method
|
|
import wikipedia
|
|
|
|
def mock_search(*args, **kwargs):
|
|
return ["Test Article"]
|
|
|
|
monkeypatch.setattr(wikipedia, "search", mock_search)
|
|
|
|
# Mock wikipedia.summary to avoid actual API calls
|
|
def mock_summary(title, *args, **kwargs):
|
|
return "Test summary for the article."
|
|
|
|
monkeypatch.setattr(wikipedia, "summary", mock_summary)
|
|
|
|
# Get previews (this is where URLs are constructed)
|
|
previews = search._get_previews("test query")
|
|
|
|
# Verify that the URL is properly constructed
|
|
if previews:
|
|
for preview in previews:
|
|
url = preview.get("link", "")
|
|
# URL should be a valid Wikipedia URL
|
|
assert validate_wikipedia_url(url), (
|
|
f"Invalid URL generated: {url}"
|
|
)
|
|
# URL should follow the expected pattern
|
|
assert url.startswith("https://en.wikipedia.org/wiki/"), (
|
|
f"Unexpected URL format: {url}"
|
|
)
|
|
|
|
|
|
def test_wikipedia_url_in_response_validation():
|
|
"""Test that we validate URLs in Wikipedia API responses."""
|
|
# This is a conceptual test showing how URL validation should be used
|
|
# when processing Wikipedia API responses
|
|
|
|
def process_wikipedia_response(response_data):
|
|
"""Process Wikipedia response with URL validation."""
|
|
results = []
|
|
|
|
for item in response_data.get("results", []):
|
|
# If the response contains URLs, validate them
|
|
if "url" in item:
|
|
if not validate_wikipedia_url(item["url"]):
|
|
# Skip items with invalid URLs
|
|
continue
|
|
|
|
results.append(item)
|
|
|
|
return results
|
|
|
|
# Test with mixed valid/invalid URLs
|
|
test_response = {
|
|
"results": [
|
|
{
|
|
"title": "Valid Article",
|
|
"url": "https://en.wikipedia.org/wiki/Valid",
|
|
},
|
|
{
|
|
"title": "Invalid Article",
|
|
"url": "https://fake-wikipedia.org/wiki/Invalid",
|
|
},
|
|
{"title": "No URL Article"}, # No URL field
|
|
]
|
|
}
|
|
|
|
processed = process_wikipedia_response(test_response)
|
|
|
|
# Should only include valid items
|
|
assert len(processed) == 2
|
|
assert processed[0]["title"] == "Valid Article"
|
|
assert processed[1]["title"] == "No URL Article"
|