mirror of
https://github.com/LearningCircuit/local-deep-research.git
synced 2026-06-15 19:46:56 +03:00
PR #3635 added a chmod for /install/.venv/.../database/migrations to the production `ldr` stage to satisfy alembic_runner._validate_migrations_- permissions, which refuses world-writable migration dirs (pip/pdm umask quirk — pip#8164, conda#12829). The fix only landed in `ldr`; the `ldr-test` stage runs its own `pdm install` and never inherited it, so the test image still ships migrations/versions/ as world-writable. Same PR also dropped the inner try/except around initialize_database(), making migration failures surface as 503 instead of being silently swallowed. The `Create Release` run on main right after #3635 merged hit this on the auth-login Puppeteer shard (run 24932815020): every login attempt 503'd with "Database initialisation failed" because test_admin's DB had tables but no stamped revision, so alembic re-ran on every login → permission check → ValueError. All 3 retry attempts failed identically. Mirror the chmod into ldr-test, right after `pdm install`. Same targeted scope as the production fix — only the migrations subtree, not a blanket venv chmod.
317 lines
13 KiB
Docker
317 lines
13 KiB
Docker
####
|
|
# Used for building the LDR service dependencies.
|
|
####
|
|
FROM python:3.14.4-slim@sha256:538a18f1db92b4210a0b71aca2d14c156a96dedbe8867465c8ff4dce04d2ec39 AS builder-base
|
|
|
|
# Set shell to bash with pipefail for safer pipe handling
|
|
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
|
|
|
|
ARG DEBIAN_FRONTEND=noninteractive
|
|
|
|
# Install system dependencies for SQLCipher and Node.js for frontend build
|
|
# Using Acquire::Retries to handle transient Debian mirror errors during CI
|
|
RUN apt-get update -o Acquire::Retries=3 && apt-get upgrade -y -o Acquire::Retries=3 \
|
|
&& apt-get install -y --no-install-recommends -o Acquire::Retries=3 \
|
|
libsqlcipher-dev \
|
|
sqlcipher \
|
|
libsqlcipher1 \
|
|
build-essential \
|
|
pkg-config \
|
|
curl \
|
|
ca-certificates \
|
|
gnupg \
|
|
# Add NodeSource GPG key and repository directly (pinned to Node.js 24.x LTS)
|
|
# GPG key fingerprint verification for supply chain security
|
|
# Key: NSolid <nsolid-gpg@nodesource.com> (RSA 2048-bit, created 2016-05-23)
|
|
# Fingerprint verified from: https://github.com/nodesource/distributions
|
|
# If key rotates, update NODESOURCE_GPG_FINGERPRINT and verify new key at:
|
|
# https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key
|
|
&& NODESOURCE_GPG_FINGERPRINT="6F71F525282841EEDAF851B42F59B5F99B1BE0B4" \
|
|
&& curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key -o /tmp/nodesource.gpg.key \
|
|
&& ACTUAL_FINGERPRINT=$(gpg --with-fingerprint --with-colons --show-keys /tmp/nodesource.gpg.key 2>/dev/null | grep "^fpr" | head -1 | cut -d: -f10) \
|
|
&& if [ "$ACTUAL_FINGERPRINT" != "$NODESOURCE_GPG_FINGERPRINT" ]; then \
|
|
echo "ERROR: NodeSource GPG key fingerprint mismatch!" >&2; \
|
|
echo "Expected: $NODESOURCE_GPG_FINGERPRINT" >&2; \
|
|
echo "Actual: $ACTUAL_FINGERPRINT" >&2; \
|
|
echo "The NodeSource signing key may have been rotated or compromised." >&2; \
|
|
echo "Verify the new key and update NODESOURCE_GPG_FINGERPRINT if valid." >&2; \
|
|
exit 1; \
|
|
fi \
|
|
&& gpg --batch --dearmor -o /usr/share/keyrings/nodesource.gpg /tmp/nodesource.gpg.key \
|
|
&& rm /tmp/nodesource.gpg.key \
|
|
&& echo "deb [signed-by=/usr/share/keyrings/nodesource.gpg] https://deb.nodesource.com/node_24.x nodistro main" > /etc/apt/sources.list.d/nodesource.list \
|
|
&& apt-get update \
|
|
&& apt-get install -y --no-install-recommends nodejs \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Install dependencies and tools (pinned versions for reproducibility)
|
|
# Pin pip, pdm, and playwright to specific versions for OSSF Scorecard compliance
|
|
# Note: hishel<1.0.0 is required due to https://github.com/pdm-project/pdm/issues/3657
|
|
# Note: wheel>=0.46.2 is required for CVE-2026-24049 fix (path traversal)
|
|
RUN pip3 install --no-cache-dir pip==26.0 \
|
|
&& pip install --no-cache-dir pdm==2.26.2 "hishel<1.0.0" playwright==1.58.0 "wheel>=0.46.2"
|
|
# disable update check
|
|
ENV PDM_CHECK_UPDATE=false
|
|
# Increase PDM request timeout from default 15s to 120s for large packages (numpy, torch)
|
|
# This helps prevent httpcore.ReadTimeout errors during CI network congestion
|
|
ENV PDM_REQUEST_TIMEOUT=120
|
|
|
|
# Build argument to invalidate cache when dependencies change
|
|
ARG DEPS_HASH
|
|
|
|
WORKDIR /install
|
|
|
|
# Copy dependency files first (changes rarely)
|
|
COPY pyproject.toml pyproject.toml
|
|
COPY pdm.lock pdm.lock
|
|
COPY LICENSE LICENSE
|
|
COPY README.md README.md
|
|
|
|
# Copy frontend build files
|
|
COPY package.json package.json
|
|
COPY package-lock.json* package-lock.json
|
|
COPY vite.config.js vite.config.js
|
|
|
|
# Source files last (changes most frequently). Note: with the current layout,
|
|
# caching benefit is limited because all RUN commands (npm ci, npm run build,
|
|
# pdm install) live in the builder stage which rebuilds when builder-base changes.
|
|
# This ordering is still good practice for Dockerfile maintainability.
|
|
COPY src/ src
|
|
|
|
####
|
|
# Builds the LDR service dependencies used in production.
|
|
####
|
|
FROM builder-base AS builder
|
|
|
|
# Install npm dependencies, build frontend, and install Python dependencies
|
|
# PDM will automatically select the correct SQLCipher package based on platform
|
|
# Using npm ci for reproducible builds with lockfile integrity verification
|
|
# These RUNs are separate for caching
|
|
RUN npm ci
|
|
RUN npm run build
|
|
RUN for i in 1 2 3; do \
|
|
if pdm install --prod --no-editable; then \
|
|
break; \
|
|
else \
|
|
echo "PDM install attempt $i failed, retrying in 15s..."; \
|
|
sleep 15; \
|
|
fi; \
|
|
done
|
|
|
|
|
|
####
|
|
# Container for running tests.
|
|
####
|
|
FROM builder-base AS ldr-test
|
|
|
|
# Set shell to bash with pipefail for safer pipe handling
|
|
# Note: Explicitly set even though inherited from builder-base for hadolint static analysis
|
|
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
|
|
|
|
ARG DEBIAN_FRONTEND=noninteractive
|
|
|
|
# Install additional runtime dependencies for testing tools
|
|
# Note: Node.js is already installed from builder-base
|
|
# Using Acquire::Retries to handle transient Debian mirror errors during CI
|
|
RUN apt-get update -o Acquire::Retries=3 && apt-get upgrade -y -o Acquire::Retries=3 \
|
|
&& apt-get install -y --no-install-recommends -o Acquire::Retries=3 \
|
|
xauth \
|
|
xvfb \
|
|
# Dependencies for Chromium
|
|
fonts-liberation \
|
|
libasound2 \
|
|
libatk-bridge2.0-0 \
|
|
libatk1.0-0 \
|
|
libatspi2.0-0 \
|
|
libcups2 \
|
|
libdbus-1-3 \
|
|
libdrm2 \
|
|
libgbm1 \
|
|
libgtk-3-0 \
|
|
libnspr4 \
|
|
libnss3 \
|
|
libxcomposite1 \
|
|
libxdamage1 \
|
|
libxfixes3 \
|
|
libxkbcommon0 \
|
|
libxrandr2 \
|
|
xdg-utils \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Set up Puppeteer environment
|
|
ENV PUPPETEER_CACHE_DIR=/app/puppeteer-cache
|
|
ENV DOCKER_ENV=true
|
|
# Don't skip Chrome download - let Puppeteer download its own Chrome as fallback
|
|
# ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
|
|
|
|
# Create puppeteer cache directory with proper permissions
|
|
RUN mkdir -p /app/puppeteer-cache && chmod -R 755 /app/puppeteer-cache
|
|
|
|
# Install Playwright with Chromium first (before npm packages)
|
|
RUN playwright install --with-deps chromium || echo "Playwright install failed, will use Puppeteer's Chrome"
|
|
|
|
# Copy test package files and lockfiles for npm ci
|
|
COPY tests/api_tests_with_login/package.json tests/api_tests_with_login/package-lock.json /install/tests/api_tests_with_login/
|
|
COPY tests/ui_tests/package.json tests/ui_tests/package-lock.json /install/tests/ui_tests/
|
|
COPY tests/accessibility_tests/package.json tests/accessibility_tests/package-lock.json /install/tests/accessibility_tests/
|
|
|
|
# Install npm packages - Skip Puppeteer Chrome download since we have Playwright's Chrome
|
|
WORKDIR /install/tests/api_tests_with_login
|
|
ENV PUPPETEER_SKIP_DOWNLOAD=true
|
|
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
|
|
RUN for i in 1 2 3; do if npm ci; then break; else echo "npm ci attempt $i failed, retrying..."; sleep 5; fi; done
|
|
WORKDIR /install/tests/ui_tests
|
|
RUN for i in 1 2 3; do if npm ci; then break; else echo "npm ci attempt $i failed, retrying..."; sleep 5; fi; done
|
|
WORKDIR /install/tests/accessibility_tests
|
|
RUN for i in 1 2 3; do if npm ci; then break; else echo "npm ci attempt $i failed, retrying..."; sleep 5; fi; done
|
|
|
|
# Install Node.js Playwright browsers (version may differ from Python playwright)
|
|
RUN npx playwright install chromium
|
|
|
|
# Create a stable symlink to Chrome for Puppeteer/Lighthouse.
|
|
# Use the Playwright JavaScript API (chromium.executablePath()) to resolve the
|
|
# exact binary path from the installed Node.js Playwright version, avoiding
|
|
# hard-coded revision directories that change across releases.
|
|
RUN CHROME_PATH=$(node -e "console.log(require('playwright-core').chromium.executablePath())") && \
|
|
if [ -n "$CHROME_PATH" ] && [ -x "$CHROME_PATH" ]; then \
|
|
echo "Symlinking Chrome from: $CHROME_PATH"; \
|
|
ln -sf "$CHROME_PATH" /usr/local/bin/chrome; \
|
|
else \
|
|
echo "WARNING: No Chrome binary found at $CHROME_PATH"; \
|
|
fi
|
|
|
|
# Set environment variables for Puppeteer to use Playwright's Chrome
|
|
ENV PUPPETEER_SKIP_DOWNLOAD=true
|
|
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
|
|
ENV PUPPETEER_EXECUTABLE_PATH=/usr/local/bin/chrome
|
|
|
|
# Copy test files to /app where they will be run from
|
|
RUN mkdir -p /app && cp -r /install/tests /app/
|
|
|
|
# Ensure Chrome binaries have correct permissions
|
|
RUN chmod -R 755 /app/puppeteer-cache
|
|
|
|
WORKDIR /install
|
|
|
|
# Copy Vite build artifacts from builder stage so bundled CSS/JS are available.
|
|
# styles.css is only loaded via Vite (imported in app.js), so without the dist/
|
|
# directory the page renders without layout CSS, causing a11y test failures.
|
|
COPY --from=builder /install/src/local_deep_research/web/static/dist/ /install/src/local_deep_research/web/static/dist/
|
|
|
|
# Install the package using PDM
|
|
# PDM will automatically select the correct SQLCipher package based on platform
|
|
RUN pdm install --no-editable
|
|
|
|
# Mirror of the chmod in the `ldr` stage (see comment there). The ldr-test
|
|
# stage does its own pdm install instead of COPYing the venv from `builder`,
|
|
# so it doesn't inherit that fix and would otherwise trip
|
|
# _validate_migrations_permissions on every login during UI tests.
|
|
RUN find /install/.venv -type d -path '*/local_deep_research/database/migrations' \
|
|
-exec chmod -R go-w {} +
|
|
|
|
# Configure path to default to the venv python.
|
|
ENV PATH="/install/.venv/bin:$PATH"
|
|
|
|
# Note: Test container runs as root because CI workflows mount source code
|
|
# volumes that are owned by root. The production container (ldr) runs as
|
|
# non-root user for security.
|
|
|
|
####
|
|
# Runs the LDR service.
|
|
###
|
|
FROM python:3.14.4-slim@sha256:538a18f1db92b4210a0b71aca2d14c156a96dedbe8867465c8ff4dce04d2ec39 AS ldr
|
|
|
|
# Set shell to bash with pipefail for safer pipe handling
|
|
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
|
|
|
|
ARG DEBIAN_FRONTEND=noninteractive
|
|
|
|
# Upgrade pip to fix CVE-2026-1703 (malicious wheel extraction)
|
|
RUN pip3 install --no-cache-dir pip==26.0
|
|
|
|
# Install runtime dependencies for SQLCipher and WeasyPrint
|
|
RUN apt-get update && apt-get upgrade -y \
|
|
&& apt-get install -y --no-install-recommends \
|
|
sqlcipher \
|
|
libsqlcipher1 \
|
|
# setpriv (from util-linux, already in base image) handles user switching
|
|
# in the entrypoint — no additional package needed
|
|
#
|
|
# WeasyPrint dependencies for PDF generation
|
|
libcairo2 \
|
|
libpango-1.0-0 \
|
|
libpangocairo-1.0-0 \
|
|
libgdk-pixbuf-2.0-0 \
|
|
libffi-dev \
|
|
shared-mime-info \
|
|
# GLib and GObject dependencies (libgobject is included in libglib2.0-0)
|
|
libglib2.0-0 \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Create non-root user for running service (security best practice)
|
|
RUN groupadd -r ldruser && useradd -r -g ldruser -u 1000 -m -d /home/ldruser ldruser
|
|
|
|
# Create directories with proper permissions for non-root user
|
|
RUN mkdir -p /app/.config/local_deep_research /home/ldruser/.local/share && \
|
|
chown -R ldruser:ldruser /app /home/ldruser && \
|
|
chmod -R 755 /app /home/ldruser
|
|
|
|
# retrieve packages from build stage
|
|
COPY --chown=ldruser:ldruser --from=builder /install/.venv/ /install/.venv
|
|
ENV PATH="/install/.venv/bin:$PATH"
|
|
|
|
# Strip world-write bit from the migrations subtree. The runtime check in
|
|
# alembic_runner._validate_migrations_permissions refuses to run migrations
|
|
# if anything under migrations/versions/ is world-writable, and pip/pdm can
|
|
# leave permissive modes on the dir entries depending on the build host's
|
|
# umask (see pip#8164, conda#12829). Without this normalisation a per-user
|
|
# DB silently stays at its previous Alembic revision on every login, which
|
|
# manifests downstream as e.g. "no such table: papers" on academic-source
|
|
# saves. Targeted at the migrations subtree only — we deliberately avoid
|
|
# blanket-chmoding the venv.
|
|
RUN find /install/.venv -type d -path '*/local_deep_research/database/migrations' \
|
|
-exec chmod -R go-w {} +
|
|
|
|
# Verify SQLCipher as ldruser via setpriv.
|
|
# Running as ldruser ensures Python __pycache__ files created during import
|
|
# are owned by ldruser. Browser binaries are NOT installed in the production
|
|
# image — Playwright is only used for testing (ldr-test stage).
|
|
RUN HOME=/home/ldruser setpriv --reuid=ldruser --regid=ldruser --init-groups -- \
|
|
python -c "from local_deep_research.database.sqlcipher_compat import get_sqlcipher_module; \
|
|
sqlcipher = get_sqlcipher_module(); \
|
|
print(f'✓ SQLCipher module loaded successfully: {sqlcipher}')"
|
|
|
|
# Create volume for persistent configuration
|
|
# Use /app for configuration to support non-root user
|
|
VOLUME /app/.config/local_deep_research
|
|
|
|
# Create volume for Ollama start script
|
|
VOLUME /scripts/
|
|
# Copy the Ollama entrypoint script
|
|
COPY --chown=ldruser:ldruser scripts/ollama_entrypoint.sh /scripts/ollama_entrypoint.sh
|
|
|
|
# Copy LDR entrypoint script to handle volume permissions
|
|
COPY scripts/ldr_entrypoint.sh /usr/local/bin/ldr_entrypoint.sh
|
|
|
|
# COPY --chown sets ownership on copied contents, but Docker auto-creates
|
|
# parent dirs (/install, /scripts) as root. Fix with non-recursive chown
|
|
# (fast — avoids walking 500MB+ of venv files that are already ldruser-owned).
|
|
RUN chmod +x /scripts/ollama_entrypoint.sh \
|
|
&& chmod +x /usr/local/bin/ldr_entrypoint.sh \
|
|
&& chown ldruser:ldruser /install /scripts
|
|
|
|
EXPOSE 5000
|
|
|
|
# Health check for container orchestration (Docker, Kubernetes, etc.)
|
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
|
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:5000/api/v1/health')" || exit 1
|
|
|
|
STOPSIGNAL SIGINT
|
|
|
|
# Use entrypoint to fix volume permissions, then switch to ldruser
|
|
# The entrypoint runs as root to fix /data permissions, then drops to ldruser
|
|
ENTRYPOINT ["/usr/local/bin/ldr_entrypoint.sh"]
|
|
|
|
# Use PDM to run the application (passed to entrypoint as $@)
|
|
CMD [ "ldr-web" ]
|