Files
local-deep-research/tests/database/test_database_init.py
LearningCircuit 0e5b26cdb7 fix(tests): dispose SQLAlchemy engines in test_database_init.py (#3242)
* fix(tests): dispose SQLAlchemy engines in test_database_init.py

Replace bare engine/session usage with pytest yield fixtures (db_engine,
db_session) so resources are always cleaned up on teardown. Tests that
need unique engine setups (basic creation, encrypted, multi-user) dispose
inline instead. Supersedes #3216.

* fix(tests): move engine disposal to finally block in encrypted test

Ensures engines are disposed even if pytest.raises assertion fails,
preventing connection pool leaks in the error path.
2026-03-28 00:41:40 +01:00

482 lines
15 KiB
Python

"""Tests for database initialization and encryption functionality."""
import shutil
import tempfile
import uuid
from pathlib import Path
import pytest
from sqlalchemy import create_engine, event, inspect
from sqlalchemy.exc import OperationalError
from sqlalchemy.orm import sessionmaker
from local_deep_research.database.models import (
Base,
ResearchHistory,
Setting,
User,
)
class TestDatabaseInitialization:
"""Test suite for database initialization and setup."""
@pytest.fixture
def temp_dir(self):
"""Create a temporary directory for test databases."""
temp_dir = tempfile.mkdtemp()
yield temp_dir
shutil.rmtree(temp_dir)
@pytest.fixture
def db_engine(self, temp_dir):
"""Create and dispose a SQLite engine with all tables."""
db_path = str(Path(temp_dir) / "test.db")
engine = create_engine(f"sqlite:///{db_path}")
Base.metadata.create_all(engine)
yield engine
engine.dispose()
@pytest.fixture
def db_session(self, db_engine):
"""Create a session bound to db_engine, closed on teardown."""
Session = sessionmaker(bind=db_engine)
session = Session()
yield session
session.close()
def test_basic_database_creation(self, temp_dir):
"""Test creating a basic SQLite database."""
db_path = str(Path(temp_dir) / "test.db")
engine = create_engine(f"sqlite:///{db_path}")
Base.metadata.create_all(engine)
# Verify database file exists
assert Path(db_path).exists()
# Verify tables were created
inspector = inspect(engine)
tables = inspector.get_table_names()
# Check for essential tables
assert "users" in tables
assert "research_history" in tables
assert "settings" in tables
assert "research_resources" in tables
assert "token_usage" in tables
assert "search_cache" in tables
engine.dispose()
def test_database_creation_with_function(self, temp_dir):
"""Test database creation through standard SQLAlchemy."""
db_path = str(Path(temp_dir) / "test_user.db")
# Create engine and initialize database
engine = create_engine(f"sqlite:///{db_path}")
Base.metadata.create_all(engine)
# Verify engine is created
assert engine is not None
# Verify database exists
assert Path(db_path).exists()
# Test connection
from sqlalchemy import text
with engine.connect() as conn:
result = conn.execute(text("SELECT 1"))
assert result.fetchone()[0] == 1
engine.dispose()
def test_encrypted_database_creation(self, temp_dir):
"""Test creating an encrypted database with SQLCipher."""
db_path = str(Path(temp_dir) / "encrypted.db")
password = "test_password_123"
# Try to create encrypted database (will fail if sqlcipher not available)
engine = None
bad_engine = None
try:
# Check if pysqlcipher3 is available
import importlib.util
if importlib.util.find_spec("pysqlcipher3") is None:
raise ImportError("pysqlcipher3 not available")
# SQLCipher URI format - disable regexp to avoid compatibility issues
engine = create_engine(
f"sqlite+pysqlcipher://:{password}@/{db_path}",
connect_args={"check_same_thread": False, "timeout": 15},
)
# Disable regexp function that causes issues
@event.listens_for(engine, "connect")
def do_connect(dbapi_conn, connection_record):
# Disable loading of regexp
connection_record.info["regexp"] = False
# Create tables
Base.metadata.create_all(engine)
# Test writing data
Session = sessionmaker(bind=engine)
session = Session()
user = User(username="testuser")
session.add(user)
session.commit()
session.close()
# Verify database is encrypted by trying to open without password
bad_engine = create_engine(f"sqlite:///{db_path}")
with pytest.raises((OperationalError, Exception)):
with bad_engine.connect() as conn:
from sqlalchemy import text
conn.execute(text("SELECT * FROM users"))
except (ImportError, TypeError) as e:
# Skip if SQLCipher not available or compatibility issues
pytest.skip(f"SQLCipher test skipped: {e}")
finally:
if bad_engine is not None:
bad_engine.dispose()
if engine is not None:
engine.dispose()
def test_database_schema_completeness(self, db_engine):
"""Test that all expected tables and columns are created."""
inspector = inspect(db_engine)
# Test ResearchHistory table schema
research_columns = {
col["name"] for col in inspector.get_columns("research_history")
}
expected_columns = {
"id",
"query",
"mode",
"status",
"created_at",
"completed_at",
"duration_seconds",
"progress",
"report_path",
"report_content",
"title",
"progress_log",
"research_meta",
}
assert expected_columns.issubset(research_columns)
# Test User table schema
user_columns = {col["name"] for col in inspector.get_columns("users")}
expected_user_columns = {
"id",
"username",
"created_at",
"last_login",
"database_version",
}
assert expected_user_columns.issubset(user_columns), (
f"Missing columns: {expected_user_columns - user_columns}"
)
# Test Settings table schema
settings_columns = {
col["name"] for col in inspector.get_columns("settings")
}
expected_settings_columns = {
"id",
"key",
"value",
"type",
"category",
"description",
"name",
"ui_element",
"options",
"min_value",
"max_value",
"step",
"visible",
"editable",
"created_at",
"updated_at",
}
assert expected_settings_columns.issubset(settings_columns)
def test_database_indexes(self, db_engine):
"""Test that proper indexes are created."""
inspector = inspect(db_engine)
# Check indexes on research_history
research_indexes = inspector.get_indexes("research_history")
# Should have indexes on commonly queried fields
index_columns = set()
for idx in research_indexes:
index_columns.update(idx["column_names"])
# Status should be indexed for filtering
# Created_at should be indexed for sorting
# These might be part of composite indexes
# Check unique constraints
# Username and email should have unique constraints in users table
def test_database_foreign_keys(self, db_engine, db_session):
"""Test that foreign key relationships work correctly."""
# Create a research record
research = ResearchHistory(
id=str(uuid.uuid4()),
query="Test query",
mode="quick",
status="completed",
created_at="2024-01-01T00:00:00",
)
db_session.add(research)
db_session.commit()
# Create related records
from local_deep_research.database.models import (
ResearchResource,
TokenUsage,
)
# Add a resource
resource = ResearchResource(
research_id=research.id,
title="Test Resource",
url="https://example.com",
created_at="2024-01-01T00:01:00",
)
db_session.add(resource)
# Add token usage
usage = TokenUsage(
research_id=str(research.id),
model_provider="openai",
model_name="gpt-4",
prompt_tokens=80,
completion_tokens=20,
total_tokens=100,
)
db_session.add(usage)
db_session.commit()
# Verify relationships
assert resource.research_id == research.id
assert usage.research_id == str(research.id)
def test_database_cascade_deletes(self, db_engine, db_session):
"""Test cascade delete behavior."""
# Create a benchmark run with results
from local_deep_research.database.models import (
BenchmarkResult,
BenchmarkRun,
DatasetType,
)
run = BenchmarkRun(
config_hash="test123",
query_hash_list=[],
search_config={},
evaluation_config={},
datasets_config={},
)
db_session.add(run)
db_session.commit()
# Add results
for i in range(3):
result = BenchmarkResult(
benchmark_run_id=run.id,
example_id=f"test_{i}",
query_hash=f"hash_{i}",
dataset_type=DatasetType.SIMPLEQA,
question=f"Question {i}",
correct_answer=f"Answer {i}",
)
db_session.add(result)
db_session.commit()
# Verify results exist
result_count = (
db_session.query(BenchmarkResult)
.filter_by(benchmark_run_id=run.id)
.count()
)
assert result_count == 3
# Delete the run
db_session.delete(run)
db_session.commit()
# Verify cascade delete worked
result_count = (
db_session.query(BenchmarkResult)
.filter_by(benchmark_run_id=run.id)
.count()
)
assert result_count == 0
def test_database_transactions(self, db_engine, db_session):
"""Test transaction rollback behavior."""
# Add a user
user = User(username="testuser")
db_session.add(user)
db_session.commit()
# Start a transaction that will fail
try:
# Add another user with duplicate username (should fail)
user2 = User(username="testuser")
db_session.add(user2)
# Add a valid setting
setting = Setting(
key="test.setting",
value="test_value",
type="string",
category="test",
)
db_session.add(setting)
# This should fail due to unique constraint
db_session.commit()
except Exception:
db_session.rollback()
# Verify rollback worked - setting should not exist
setting_count = (
db_session.query(Setting).filter_by(key="test.setting").count()
)
assert setting_count == 0
# Original user should still exist
user_count = db_session.query(User).count()
assert user_count == 1
def test_database_performance_with_large_dataset(
self, db_engine, db_session
):
"""Test database performance with larger datasets."""
# Add many research records
research_count = 1000
for i in range(research_count):
research = ResearchHistory(
id=str(uuid.uuid4()),
query=f"Test query {i}",
mode="quick",
status="completed" if i % 2 == 0 else "failed",
created_at=f"2024-01-{(i % 28) + 1:02d}T00:00:00",
duration_seconds=100 + i % 500,
progress=100 if i % 2 == 0 else 50,
)
db_session.add(research)
# Commit in batches
if i % 100 == 0:
db_session.commit()
db_session.commit()
# Test query performance
import time
# Query completed research
start = time.time()
completed = (
db_session.query(ResearchHistory)
.filter_by(status="completed")
.count()
)
query_time = time.time() - start
assert completed == 500
assert query_time < 0.1 # Should be fast with indexes
# Test ordering
start = time.time()
recent = (
db_session.query(ResearchHistory)
.order_by(ResearchHistory.created_at.desc())
.limit(10)
.all()
)
order_time = time.time() - start
assert len(recent) == 10
assert order_time < 0.1
def test_user_specific_database_path(self, temp_dir):
"""Test user-specific database paths for multi-user support."""
# Test database path generation for different users
user1_path = str(Path(temp_dir) / "user1" / "user1_encrypted.db")
user2_path = str(Path(temp_dir) / "user2" / "user2_encrypted.db")
# Create directories
Path(user1_path).parent.mkdir(parents=True, exist_ok=True)
Path(user2_path).parent.mkdir(parents=True, exist_ok=True)
# Create separate databases
engine1 = create_engine(f"sqlite:///{user1_path}")
engine2 = create_engine(f"sqlite:///{user2_path}")
Base.metadata.create_all(engine1)
Base.metadata.create_all(engine2)
# Add data to each
Session1 = sessionmaker(bind=engine1)
Session2 = sessionmaker(bind=engine2)
session1 = Session1()
session2 = Session2()
# User 1 research
research1 = ResearchHistory(
id=str(uuid.uuid4()),
query="User 1 research",
mode="quick",
status="completed",
created_at="2024-01-01T00:00:00",
)
session1.add(research1)
session1.commit()
# User 2 research
research2 = ResearchHistory(
id=str(uuid.uuid4()),
query="User 2 research",
mode="quick",
status="completed",
created_at="2024-01-01T00:00:00",
)
session2.add(research2)
session2.commit()
# Verify isolation
user1_count = session1.query(ResearchHistory).count()
user2_count = session2.query(ResearchHistory).count()
assert user1_count == 1
assert user2_count == 1
# Verify different content
user1_research = session1.query(ResearchHistory).first()
user2_research = session2.query(ResearchHistory).first()
assert user1_research.query == "User 1 research"
assert user2_research.query == "User 2 research"
session1.close()
session2.close()
engine1.dispose()
engine2.dispose()