mirror of
https://github.com/LearningCircuit/local-deep-research.git
synced 2026-06-16 03:51:07 +03:00
* fix(tests): dispose SQLAlchemy engines in test_database_init.py Replace bare engine/session usage with pytest yield fixtures (db_engine, db_session) so resources are always cleaned up on teardown. Tests that need unique engine setups (basic creation, encrypted, multi-user) dispose inline instead. Supersedes #3216. * fix(tests): move engine disposal to finally block in encrypted test Ensures engines are disposed even if pytest.raises assertion fails, preventing connection pool leaks in the error path.
482 lines
15 KiB
Python
482 lines
15 KiB
Python
"""Tests for database initialization and encryption functionality."""
|
|
|
|
import shutil
|
|
import tempfile
|
|
import uuid
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from sqlalchemy import create_engine, event, inspect
|
|
from sqlalchemy.exc import OperationalError
|
|
from sqlalchemy.orm import sessionmaker
|
|
|
|
from local_deep_research.database.models import (
|
|
Base,
|
|
ResearchHistory,
|
|
Setting,
|
|
User,
|
|
)
|
|
|
|
|
|
class TestDatabaseInitialization:
|
|
"""Test suite for database initialization and setup."""
|
|
|
|
@pytest.fixture
|
|
def temp_dir(self):
|
|
"""Create a temporary directory for test databases."""
|
|
temp_dir = tempfile.mkdtemp()
|
|
yield temp_dir
|
|
shutil.rmtree(temp_dir)
|
|
|
|
@pytest.fixture
|
|
def db_engine(self, temp_dir):
|
|
"""Create and dispose a SQLite engine with all tables."""
|
|
db_path = str(Path(temp_dir) / "test.db")
|
|
engine = create_engine(f"sqlite:///{db_path}")
|
|
Base.metadata.create_all(engine)
|
|
yield engine
|
|
engine.dispose()
|
|
|
|
@pytest.fixture
|
|
def db_session(self, db_engine):
|
|
"""Create a session bound to db_engine, closed on teardown."""
|
|
Session = sessionmaker(bind=db_engine)
|
|
session = Session()
|
|
yield session
|
|
session.close()
|
|
|
|
def test_basic_database_creation(self, temp_dir):
|
|
"""Test creating a basic SQLite database."""
|
|
db_path = str(Path(temp_dir) / "test.db")
|
|
engine = create_engine(f"sqlite:///{db_path}")
|
|
Base.metadata.create_all(engine)
|
|
|
|
# Verify database file exists
|
|
assert Path(db_path).exists()
|
|
|
|
# Verify tables were created
|
|
inspector = inspect(engine)
|
|
tables = inspector.get_table_names()
|
|
|
|
# Check for essential tables
|
|
assert "users" in tables
|
|
assert "research_history" in tables
|
|
assert "settings" in tables
|
|
assert "research_resources" in tables
|
|
assert "token_usage" in tables
|
|
assert "search_cache" in tables
|
|
|
|
engine.dispose()
|
|
|
|
def test_database_creation_with_function(self, temp_dir):
|
|
"""Test database creation through standard SQLAlchemy."""
|
|
db_path = str(Path(temp_dir) / "test_user.db")
|
|
|
|
# Create engine and initialize database
|
|
engine = create_engine(f"sqlite:///{db_path}")
|
|
Base.metadata.create_all(engine)
|
|
|
|
# Verify engine is created
|
|
assert engine is not None
|
|
|
|
# Verify database exists
|
|
assert Path(db_path).exists()
|
|
|
|
# Test connection
|
|
from sqlalchemy import text
|
|
|
|
with engine.connect() as conn:
|
|
result = conn.execute(text("SELECT 1"))
|
|
assert result.fetchone()[0] == 1
|
|
|
|
engine.dispose()
|
|
|
|
def test_encrypted_database_creation(self, temp_dir):
|
|
"""Test creating an encrypted database with SQLCipher."""
|
|
db_path = str(Path(temp_dir) / "encrypted.db")
|
|
password = "test_password_123"
|
|
|
|
# Try to create encrypted database (will fail if sqlcipher not available)
|
|
engine = None
|
|
bad_engine = None
|
|
try:
|
|
# Check if pysqlcipher3 is available
|
|
import importlib.util
|
|
|
|
if importlib.util.find_spec("pysqlcipher3") is None:
|
|
raise ImportError("pysqlcipher3 not available")
|
|
|
|
# SQLCipher URI format - disable regexp to avoid compatibility issues
|
|
engine = create_engine(
|
|
f"sqlite+pysqlcipher://:{password}@/{db_path}",
|
|
connect_args={"check_same_thread": False, "timeout": 15},
|
|
)
|
|
|
|
# Disable regexp function that causes issues
|
|
@event.listens_for(engine, "connect")
|
|
def do_connect(dbapi_conn, connection_record):
|
|
# Disable loading of regexp
|
|
connection_record.info["regexp"] = False
|
|
|
|
# Create tables
|
|
Base.metadata.create_all(engine)
|
|
|
|
# Test writing data
|
|
Session = sessionmaker(bind=engine)
|
|
session = Session()
|
|
|
|
user = User(username="testuser")
|
|
session.add(user)
|
|
session.commit()
|
|
session.close()
|
|
|
|
# Verify database is encrypted by trying to open without password
|
|
bad_engine = create_engine(f"sqlite:///{db_path}")
|
|
with pytest.raises((OperationalError, Exception)):
|
|
with bad_engine.connect() as conn:
|
|
from sqlalchemy import text
|
|
|
|
conn.execute(text("SELECT * FROM users"))
|
|
|
|
except (ImportError, TypeError) as e:
|
|
# Skip if SQLCipher not available or compatibility issues
|
|
pytest.skip(f"SQLCipher test skipped: {e}")
|
|
finally:
|
|
if bad_engine is not None:
|
|
bad_engine.dispose()
|
|
if engine is not None:
|
|
engine.dispose()
|
|
|
|
def test_database_schema_completeness(self, db_engine):
|
|
"""Test that all expected tables and columns are created."""
|
|
inspector = inspect(db_engine)
|
|
|
|
# Test ResearchHistory table schema
|
|
research_columns = {
|
|
col["name"] for col in inspector.get_columns("research_history")
|
|
}
|
|
expected_columns = {
|
|
"id",
|
|
"query",
|
|
"mode",
|
|
"status",
|
|
"created_at",
|
|
"completed_at",
|
|
"duration_seconds",
|
|
"progress",
|
|
"report_path",
|
|
"report_content",
|
|
"title",
|
|
"progress_log",
|
|
"research_meta",
|
|
}
|
|
assert expected_columns.issubset(research_columns)
|
|
|
|
# Test User table schema
|
|
user_columns = {col["name"] for col in inspector.get_columns("users")}
|
|
expected_user_columns = {
|
|
"id",
|
|
"username",
|
|
"created_at",
|
|
"last_login",
|
|
"database_version",
|
|
}
|
|
assert expected_user_columns.issubset(user_columns), (
|
|
f"Missing columns: {expected_user_columns - user_columns}"
|
|
)
|
|
|
|
# Test Settings table schema
|
|
settings_columns = {
|
|
col["name"] for col in inspector.get_columns("settings")
|
|
}
|
|
expected_settings_columns = {
|
|
"id",
|
|
"key",
|
|
"value",
|
|
"type",
|
|
"category",
|
|
"description",
|
|
"name",
|
|
"ui_element",
|
|
"options",
|
|
"min_value",
|
|
"max_value",
|
|
"step",
|
|
"visible",
|
|
"editable",
|
|
"created_at",
|
|
"updated_at",
|
|
}
|
|
assert expected_settings_columns.issubset(settings_columns)
|
|
|
|
def test_database_indexes(self, db_engine):
|
|
"""Test that proper indexes are created."""
|
|
inspector = inspect(db_engine)
|
|
|
|
# Check indexes on research_history
|
|
research_indexes = inspector.get_indexes("research_history")
|
|
# Should have indexes on commonly queried fields
|
|
index_columns = set()
|
|
for idx in research_indexes:
|
|
index_columns.update(idx["column_names"])
|
|
|
|
# Status should be indexed for filtering
|
|
# Created_at should be indexed for sorting
|
|
# These might be part of composite indexes
|
|
|
|
# Check unique constraints
|
|
# Username and email should have unique constraints in users table
|
|
|
|
def test_database_foreign_keys(self, db_engine, db_session):
|
|
"""Test that foreign key relationships work correctly."""
|
|
# Create a research record
|
|
research = ResearchHistory(
|
|
id=str(uuid.uuid4()),
|
|
query="Test query",
|
|
mode="quick",
|
|
status="completed",
|
|
created_at="2024-01-01T00:00:00",
|
|
)
|
|
db_session.add(research)
|
|
db_session.commit()
|
|
|
|
# Create related records
|
|
from local_deep_research.database.models import (
|
|
ResearchResource,
|
|
TokenUsage,
|
|
)
|
|
|
|
# Add a resource
|
|
resource = ResearchResource(
|
|
research_id=research.id,
|
|
title="Test Resource",
|
|
url="https://example.com",
|
|
created_at="2024-01-01T00:01:00",
|
|
)
|
|
db_session.add(resource)
|
|
|
|
# Add token usage
|
|
usage = TokenUsage(
|
|
research_id=str(research.id),
|
|
model_provider="openai",
|
|
model_name="gpt-4",
|
|
prompt_tokens=80,
|
|
completion_tokens=20,
|
|
total_tokens=100,
|
|
)
|
|
db_session.add(usage)
|
|
|
|
db_session.commit()
|
|
|
|
# Verify relationships
|
|
assert resource.research_id == research.id
|
|
assert usage.research_id == str(research.id)
|
|
|
|
def test_database_cascade_deletes(self, db_engine, db_session):
|
|
"""Test cascade delete behavior."""
|
|
# Create a benchmark run with results
|
|
from local_deep_research.database.models import (
|
|
BenchmarkResult,
|
|
BenchmarkRun,
|
|
DatasetType,
|
|
)
|
|
|
|
run = BenchmarkRun(
|
|
config_hash="test123",
|
|
query_hash_list=[],
|
|
search_config={},
|
|
evaluation_config={},
|
|
datasets_config={},
|
|
)
|
|
db_session.add(run)
|
|
db_session.commit()
|
|
|
|
# Add results
|
|
for i in range(3):
|
|
result = BenchmarkResult(
|
|
benchmark_run_id=run.id,
|
|
example_id=f"test_{i}",
|
|
query_hash=f"hash_{i}",
|
|
dataset_type=DatasetType.SIMPLEQA,
|
|
question=f"Question {i}",
|
|
correct_answer=f"Answer {i}",
|
|
)
|
|
db_session.add(result)
|
|
|
|
db_session.commit()
|
|
|
|
# Verify results exist
|
|
result_count = (
|
|
db_session.query(BenchmarkResult)
|
|
.filter_by(benchmark_run_id=run.id)
|
|
.count()
|
|
)
|
|
assert result_count == 3
|
|
|
|
# Delete the run
|
|
db_session.delete(run)
|
|
db_session.commit()
|
|
|
|
# Verify cascade delete worked
|
|
result_count = (
|
|
db_session.query(BenchmarkResult)
|
|
.filter_by(benchmark_run_id=run.id)
|
|
.count()
|
|
)
|
|
assert result_count == 0
|
|
|
|
def test_database_transactions(self, db_engine, db_session):
|
|
"""Test transaction rollback behavior."""
|
|
# Add a user
|
|
user = User(username="testuser")
|
|
db_session.add(user)
|
|
db_session.commit()
|
|
|
|
# Start a transaction that will fail
|
|
try:
|
|
# Add another user with duplicate username (should fail)
|
|
user2 = User(username="testuser")
|
|
db_session.add(user2)
|
|
|
|
# Add a valid setting
|
|
setting = Setting(
|
|
key="test.setting",
|
|
value="test_value",
|
|
type="string",
|
|
category="test",
|
|
)
|
|
db_session.add(setting)
|
|
|
|
# This should fail due to unique constraint
|
|
db_session.commit()
|
|
|
|
except Exception:
|
|
db_session.rollback()
|
|
|
|
# Verify rollback worked - setting should not exist
|
|
setting_count = (
|
|
db_session.query(Setting).filter_by(key="test.setting").count()
|
|
)
|
|
assert setting_count == 0
|
|
|
|
# Original user should still exist
|
|
user_count = db_session.query(User).count()
|
|
assert user_count == 1
|
|
|
|
def test_database_performance_with_large_dataset(
|
|
self, db_engine, db_session
|
|
):
|
|
"""Test database performance with larger datasets."""
|
|
# Add many research records
|
|
research_count = 1000
|
|
for i in range(research_count):
|
|
research = ResearchHistory(
|
|
id=str(uuid.uuid4()),
|
|
query=f"Test query {i}",
|
|
mode="quick",
|
|
status="completed" if i % 2 == 0 else "failed",
|
|
created_at=f"2024-01-{(i % 28) + 1:02d}T00:00:00",
|
|
duration_seconds=100 + i % 500,
|
|
progress=100 if i % 2 == 0 else 50,
|
|
)
|
|
db_session.add(research)
|
|
|
|
# Commit in batches
|
|
if i % 100 == 0:
|
|
db_session.commit()
|
|
|
|
db_session.commit()
|
|
|
|
# Test query performance
|
|
import time
|
|
|
|
# Query completed research
|
|
start = time.time()
|
|
completed = (
|
|
db_session.query(ResearchHistory)
|
|
.filter_by(status="completed")
|
|
.count()
|
|
)
|
|
query_time = time.time() - start
|
|
|
|
assert completed == 500
|
|
assert query_time < 0.1 # Should be fast with indexes
|
|
|
|
# Test ordering
|
|
start = time.time()
|
|
recent = (
|
|
db_session.query(ResearchHistory)
|
|
.order_by(ResearchHistory.created_at.desc())
|
|
.limit(10)
|
|
.all()
|
|
)
|
|
order_time = time.time() - start
|
|
|
|
assert len(recent) == 10
|
|
assert order_time < 0.1
|
|
|
|
def test_user_specific_database_path(self, temp_dir):
|
|
"""Test user-specific database paths for multi-user support."""
|
|
# Test database path generation for different users
|
|
user1_path = str(Path(temp_dir) / "user1" / "user1_encrypted.db")
|
|
user2_path = str(Path(temp_dir) / "user2" / "user2_encrypted.db")
|
|
|
|
# Create directories
|
|
Path(user1_path).parent.mkdir(parents=True, exist_ok=True)
|
|
Path(user2_path).parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Create separate databases
|
|
engine1 = create_engine(f"sqlite:///{user1_path}")
|
|
engine2 = create_engine(f"sqlite:///{user2_path}")
|
|
|
|
Base.metadata.create_all(engine1)
|
|
Base.metadata.create_all(engine2)
|
|
|
|
# Add data to each
|
|
Session1 = sessionmaker(bind=engine1)
|
|
Session2 = sessionmaker(bind=engine2)
|
|
|
|
session1 = Session1()
|
|
session2 = Session2()
|
|
|
|
# User 1 research
|
|
research1 = ResearchHistory(
|
|
id=str(uuid.uuid4()),
|
|
query="User 1 research",
|
|
mode="quick",
|
|
status="completed",
|
|
created_at="2024-01-01T00:00:00",
|
|
)
|
|
session1.add(research1)
|
|
session1.commit()
|
|
|
|
# User 2 research
|
|
research2 = ResearchHistory(
|
|
id=str(uuid.uuid4()),
|
|
query="User 2 research",
|
|
mode="quick",
|
|
status="completed",
|
|
created_at="2024-01-01T00:00:00",
|
|
)
|
|
session2.add(research2)
|
|
session2.commit()
|
|
|
|
# Verify isolation
|
|
user1_count = session1.query(ResearchHistory).count()
|
|
user2_count = session2.query(ResearchHistory).count()
|
|
|
|
assert user1_count == 1
|
|
assert user2_count == 1
|
|
|
|
# Verify different content
|
|
user1_research = session1.query(ResearchHistory).first()
|
|
user2_research = session2.query(ResearchHistory).first()
|
|
|
|
assert user1_research.query == "User 1 research"
|
|
assert user2_research.query == "User 2 research"
|
|
|
|
session1.close()
|
|
session2.close()
|
|
engine1.dispose()
|
|
engine2.dispose()
|