Files
local-deep-research/Dockerfile
LearningCircuit f35e91de8d fix(docker): chmod migrations dir in ldr-test stage too (#3650)
PR #3635 added a chmod for /install/.venv/.../database/migrations to the
production `ldr` stage to satisfy alembic_runner._validate_migrations_-
permissions, which refuses world-writable migration dirs (pip/pdm umask
quirk — pip#8164, conda#12829). The fix only landed in `ldr`; the
`ldr-test` stage runs its own `pdm install` and never inherited it, so
the test image still ships migrations/versions/ as world-writable.

Same PR also dropped the inner try/except around initialize_database(),
making migration failures surface as 503 instead of being silently
swallowed. The `Create Release` run on main right after #3635 merged
hit this on the auth-login Puppeteer shard (run 24932815020): every
login attempt 503'd with "Database initialisation failed" because
test_admin's DB had tables but no stamped revision, so alembic re-ran
on every login → permission check → ValueError. All 3 retry attempts
failed identically.

Mirror the chmod into ldr-test, right after `pdm install`. Same
targeted scope as the production fix — only the migrations subtree,
not a blanket venv chmod.
2026-04-25 19:23:17 +02:00

317 lines
13 KiB
Docker

####
# Used for building the LDR service dependencies.
####
FROM python:3.14.4-slim@sha256:538a18f1db92b4210a0b71aca2d14c156a96dedbe8867465c8ff4dce04d2ec39 AS builder-base
# Set shell to bash with pipefail for safer pipe handling
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
ARG DEBIAN_FRONTEND=noninteractive
# Install system dependencies for SQLCipher and Node.js for frontend build
# Using Acquire::Retries to handle transient Debian mirror errors during CI
RUN apt-get update -o Acquire::Retries=3 && apt-get upgrade -y -o Acquire::Retries=3 \
&& apt-get install -y --no-install-recommends -o Acquire::Retries=3 \
libsqlcipher-dev \
sqlcipher \
libsqlcipher1 \
build-essential \
pkg-config \
curl \
ca-certificates \
gnupg \
# Add NodeSource GPG key and repository directly (pinned to Node.js 24.x LTS)
# GPG key fingerprint verification for supply chain security
# Key: NSolid <nsolid-gpg@nodesource.com> (RSA 2048-bit, created 2016-05-23)
# Fingerprint verified from: https://github.com/nodesource/distributions
# If key rotates, update NODESOURCE_GPG_FINGERPRINT and verify new key at:
# https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key
&& NODESOURCE_GPG_FINGERPRINT="6F71F525282841EEDAF851B42F59B5F99B1BE0B4" \
&& curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key -o /tmp/nodesource.gpg.key \
&& ACTUAL_FINGERPRINT=$(gpg --with-fingerprint --with-colons --show-keys /tmp/nodesource.gpg.key 2>/dev/null | grep "^fpr" | head -1 | cut -d: -f10) \
&& if [ "$ACTUAL_FINGERPRINT" != "$NODESOURCE_GPG_FINGERPRINT" ]; then \
echo "ERROR: NodeSource GPG key fingerprint mismatch!" >&2; \
echo "Expected: $NODESOURCE_GPG_FINGERPRINT" >&2; \
echo "Actual: $ACTUAL_FINGERPRINT" >&2; \
echo "The NodeSource signing key may have been rotated or compromised." >&2; \
echo "Verify the new key and update NODESOURCE_GPG_FINGERPRINT if valid." >&2; \
exit 1; \
fi \
&& gpg --batch --dearmor -o /usr/share/keyrings/nodesource.gpg /tmp/nodesource.gpg.key \
&& rm /tmp/nodesource.gpg.key \
&& echo "deb [signed-by=/usr/share/keyrings/nodesource.gpg] https://deb.nodesource.com/node_24.x nodistro main" > /etc/apt/sources.list.d/nodesource.list \
&& apt-get update \
&& apt-get install -y --no-install-recommends nodejs \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies and tools (pinned versions for reproducibility)
# Pin pip, pdm, and playwright to specific versions for OSSF Scorecard compliance
# Note: hishel<1.0.0 is required due to https://github.com/pdm-project/pdm/issues/3657
# Note: wheel>=0.46.2 is required for CVE-2026-24049 fix (path traversal)
RUN pip3 install --no-cache-dir pip==26.0 \
&& pip install --no-cache-dir pdm==2.26.2 "hishel<1.0.0" playwright==1.58.0 "wheel>=0.46.2"
# disable update check
ENV PDM_CHECK_UPDATE=false
# Increase PDM request timeout from default 15s to 120s for large packages (numpy, torch)
# This helps prevent httpcore.ReadTimeout errors during CI network congestion
ENV PDM_REQUEST_TIMEOUT=120
# Build argument to invalidate cache when dependencies change
ARG DEPS_HASH
WORKDIR /install
# Copy dependency files first (changes rarely)
COPY pyproject.toml pyproject.toml
COPY pdm.lock pdm.lock
COPY LICENSE LICENSE
COPY README.md README.md
# Copy frontend build files
COPY package.json package.json
COPY package-lock.json* package-lock.json
COPY vite.config.js vite.config.js
# Source files last (changes most frequently). Note: with the current layout,
# caching benefit is limited because all RUN commands (npm ci, npm run build,
# pdm install) live in the builder stage which rebuilds when builder-base changes.
# This ordering is still good practice for Dockerfile maintainability.
COPY src/ src
####
# Builds the LDR service dependencies used in production.
####
FROM builder-base AS builder
# Install npm dependencies, build frontend, and install Python dependencies
# PDM will automatically select the correct SQLCipher package based on platform
# Using npm ci for reproducible builds with lockfile integrity verification
# These RUNs are separate for caching
RUN npm ci
RUN npm run build
RUN for i in 1 2 3; do \
if pdm install --prod --no-editable; then \
break; \
else \
echo "PDM install attempt $i failed, retrying in 15s..."; \
sleep 15; \
fi; \
done
####
# Container for running tests.
####
FROM builder-base AS ldr-test
# Set shell to bash with pipefail for safer pipe handling
# Note: Explicitly set even though inherited from builder-base for hadolint static analysis
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
ARG DEBIAN_FRONTEND=noninteractive
# Install additional runtime dependencies for testing tools
# Note: Node.js is already installed from builder-base
# Using Acquire::Retries to handle transient Debian mirror errors during CI
RUN apt-get update -o Acquire::Retries=3 && apt-get upgrade -y -o Acquire::Retries=3 \
&& apt-get install -y --no-install-recommends -o Acquire::Retries=3 \
xauth \
xvfb \
# Dependencies for Chromium
fonts-liberation \
libasound2 \
libatk-bridge2.0-0 \
libatk1.0-0 \
libatspi2.0-0 \
libcups2 \
libdbus-1-3 \
libdrm2 \
libgbm1 \
libgtk-3-0 \
libnspr4 \
libnss3 \
libxcomposite1 \
libxdamage1 \
libxfixes3 \
libxkbcommon0 \
libxrandr2 \
xdg-utils \
&& rm -rf /var/lib/apt/lists/*
# Set up Puppeteer environment
ENV PUPPETEER_CACHE_DIR=/app/puppeteer-cache
ENV DOCKER_ENV=true
# Don't skip Chrome download - let Puppeteer download its own Chrome as fallback
# ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
# Create puppeteer cache directory with proper permissions
RUN mkdir -p /app/puppeteer-cache && chmod -R 755 /app/puppeteer-cache
# Install Playwright with Chromium first (before npm packages)
RUN playwright install --with-deps chromium || echo "Playwright install failed, will use Puppeteer's Chrome"
# Copy test package files and lockfiles for npm ci
COPY tests/api_tests_with_login/package.json tests/api_tests_with_login/package-lock.json /install/tests/api_tests_with_login/
COPY tests/ui_tests/package.json tests/ui_tests/package-lock.json /install/tests/ui_tests/
COPY tests/accessibility_tests/package.json tests/accessibility_tests/package-lock.json /install/tests/accessibility_tests/
# Install npm packages - Skip Puppeteer Chrome download since we have Playwright's Chrome
WORKDIR /install/tests/api_tests_with_login
ENV PUPPETEER_SKIP_DOWNLOAD=true
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
RUN for i in 1 2 3; do if npm ci; then break; else echo "npm ci attempt $i failed, retrying..."; sleep 5; fi; done
WORKDIR /install/tests/ui_tests
RUN for i in 1 2 3; do if npm ci; then break; else echo "npm ci attempt $i failed, retrying..."; sleep 5; fi; done
WORKDIR /install/tests/accessibility_tests
RUN for i in 1 2 3; do if npm ci; then break; else echo "npm ci attempt $i failed, retrying..."; sleep 5; fi; done
# Install Node.js Playwright browsers (version may differ from Python playwright)
RUN npx playwright install chromium
# Create a stable symlink to Chrome for Puppeteer/Lighthouse.
# Use the Playwright JavaScript API (chromium.executablePath()) to resolve the
# exact binary path from the installed Node.js Playwright version, avoiding
# hard-coded revision directories that change across releases.
RUN CHROME_PATH=$(node -e "console.log(require('playwright-core').chromium.executablePath())") && \
if [ -n "$CHROME_PATH" ] && [ -x "$CHROME_PATH" ]; then \
echo "Symlinking Chrome from: $CHROME_PATH"; \
ln -sf "$CHROME_PATH" /usr/local/bin/chrome; \
else \
echo "WARNING: No Chrome binary found at $CHROME_PATH"; \
fi
# Set environment variables for Puppeteer to use Playwright's Chrome
ENV PUPPETEER_SKIP_DOWNLOAD=true
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
ENV PUPPETEER_EXECUTABLE_PATH=/usr/local/bin/chrome
# Copy test files to /app where they will be run from
RUN mkdir -p /app && cp -r /install/tests /app/
# Ensure Chrome binaries have correct permissions
RUN chmod -R 755 /app/puppeteer-cache
WORKDIR /install
# Copy Vite build artifacts from builder stage so bundled CSS/JS are available.
# styles.css is only loaded via Vite (imported in app.js), so without the dist/
# directory the page renders without layout CSS, causing a11y test failures.
COPY --from=builder /install/src/local_deep_research/web/static/dist/ /install/src/local_deep_research/web/static/dist/
# Install the package using PDM
# PDM will automatically select the correct SQLCipher package based on platform
RUN pdm install --no-editable
# Mirror of the chmod in the `ldr` stage (see comment there). The ldr-test
# stage does its own pdm install instead of COPYing the venv from `builder`,
# so it doesn't inherit that fix and would otherwise trip
# _validate_migrations_permissions on every login during UI tests.
RUN find /install/.venv -type d -path '*/local_deep_research/database/migrations' \
-exec chmod -R go-w {} +
# Configure path to default to the venv python.
ENV PATH="/install/.venv/bin:$PATH"
# Note: Test container runs as root because CI workflows mount source code
# volumes that are owned by root. The production container (ldr) runs as
# non-root user for security.
####
# Runs the LDR service.
###
FROM python:3.14.4-slim@sha256:538a18f1db92b4210a0b71aca2d14c156a96dedbe8867465c8ff4dce04d2ec39 AS ldr
# Set shell to bash with pipefail for safer pipe handling
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
ARG DEBIAN_FRONTEND=noninteractive
# Upgrade pip to fix CVE-2026-1703 (malicious wheel extraction)
RUN pip3 install --no-cache-dir pip==26.0
# Install runtime dependencies for SQLCipher and WeasyPrint
RUN apt-get update && apt-get upgrade -y \
&& apt-get install -y --no-install-recommends \
sqlcipher \
libsqlcipher1 \
# setpriv (from util-linux, already in base image) handles user switching
# in the entrypoint — no additional package needed
#
# WeasyPrint dependencies for PDF generation
libcairo2 \
libpango-1.0-0 \
libpangocairo-1.0-0 \
libgdk-pixbuf-2.0-0 \
libffi-dev \
shared-mime-info \
# GLib and GObject dependencies (libgobject is included in libglib2.0-0)
libglib2.0-0 \
&& rm -rf /var/lib/apt/lists/*
# Create non-root user for running service (security best practice)
RUN groupadd -r ldruser && useradd -r -g ldruser -u 1000 -m -d /home/ldruser ldruser
# Create directories with proper permissions for non-root user
RUN mkdir -p /app/.config/local_deep_research /home/ldruser/.local/share && \
chown -R ldruser:ldruser /app /home/ldruser && \
chmod -R 755 /app /home/ldruser
# retrieve packages from build stage
COPY --chown=ldruser:ldruser --from=builder /install/.venv/ /install/.venv
ENV PATH="/install/.venv/bin:$PATH"
# Strip world-write bit from the migrations subtree. The runtime check in
# alembic_runner._validate_migrations_permissions refuses to run migrations
# if anything under migrations/versions/ is world-writable, and pip/pdm can
# leave permissive modes on the dir entries depending on the build host's
# umask (see pip#8164, conda#12829). Without this normalisation a per-user
# DB silently stays at its previous Alembic revision on every login, which
# manifests downstream as e.g. "no such table: papers" on academic-source
# saves. Targeted at the migrations subtree only — we deliberately avoid
# blanket-chmoding the venv.
RUN find /install/.venv -type d -path '*/local_deep_research/database/migrations' \
-exec chmod -R go-w {} +
# Verify SQLCipher as ldruser via setpriv.
# Running as ldruser ensures Python __pycache__ files created during import
# are owned by ldruser. Browser binaries are NOT installed in the production
# image — Playwright is only used for testing (ldr-test stage).
RUN HOME=/home/ldruser setpriv --reuid=ldruser --regid=ldruser --init-groups -- \
python -c "from local_deep_research.database.sqlcipher_compat import get_sqlcipher_module; \
sqlcipher = get_sqlcipher_module(); \
print(f'✓ SQLCipher module loaded successfully: {sqlcipher}')"
# Create volume for persistent configuration
# Use /app for configuration to support non-root user
VOLUME /app/.config/local_deep_research
# Create volume for Ollama start script
VOLUME /scripts/
# Copy the Ollama entrypoint script
COPY --chown=ldruser:ldruser scripts/ollama_entrypoint.sh /scripts/ollama_entrypoint.sh
# Copy LDR entrypoint script to handle volume permissions
COPY scripts/ldr_entrypoint.sh /usr/local/bin/ldr_entrypoint.sh
# COPY --chown sets ownership on copied contents, but Docker auto-creates
# parent dirs (/install, /scripts) as root. Fix with non-recursive chown
# (fast — avoids walking 500MB+ of venv files that are already ldruser-owned).
RUN chmod +x /scripts/ollama_entrypoint.sh \
&& chmod +x /usr/local/bin/ldr_entrypoint.sh \
&& chown ldruser:ldruser /install /scripts
EXPOSE 5000
# Health check for container orchestration (Docker, Kubernetes, etc.)
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:5000/api/v1/health')" || exit 1
STOPSIGNAL SIGINT
# Use entrypoint to fix volume permissions, then switch to ldruser
# The entrypoint runs as root to fix /data permissions, then drops to ldruser
ENTRYPOINT ["/usr/local/bin/ldr_entrypoint.sh"]
# Use PDM to run the application (passed to entrypoint as $@)
CMD [ "ldr-web" ]