diff --git a/.github/workflows/gitnexus-deploy.yml b/.github/workflows/gitnexus-deploy.yml index 986d5b5503..203138c706 100644 --- a/.github/workflows/gitnexus-deploy.yml +++ b/.github/workflows/gitnexus-deploy.yml @@ -247,7 +247,18 @@ jobs: } } - for (const { pr, artifactName, fresh } of prMatches) { + // Cap to the N most recent PR indexes by artifact creation time. + // On a 10GB droplet each index is ~130MB; 3 PRs + main + dev ≈ + // 650MB of index data, leaving headroom for the ~700MB Docker image + // and OS. Older PR indexes are evicted by the prune step. + const MAX_PR_INDEXES = 3; + prMatches.sort( + (a, b) => new Date(b.fresh.created_at) - new Date(a.fresh.created_at), + ); + const keptPrs = prMatches.slice(0, MAX_PR_INDEXES); + const evictedPrs = prMatches.slice(MAX_PR_INDEXES); + + for (const { pr, artifactName, fresh } of keptPrs) { serve.push({ name: `LibreChat-pr-${pr.number}`, artifactName, @@ -255,7 +266,13 @@ jobs: }); core.info(`PR #${pr.number}: run ${fresh.workflow_run.id} -> LibreChat-pr-${pr.number}`); } - core.info(`Resolved ${prMatches.length} PR indexes out of ${openPrs.length} open PRs`); + if (evictedPrs.length) { + core.info( + `Evicted ${evictedPrs.length} older PR indexes (cap=${MAX_PR_INDEXES}): ` + + evictedPrs.map((e) => `#${e.pr.number}`).join(', '), + ); + } + core.info(`Serving ${keptPrs.length} PR indexes out of ${prMatches.length} with artifacts (${openPrs.length} open PRs total)`); if (!serve.length) { core.setFailed('No indexes to serve'); @@ -360,37 +377,22 @@ jobs: .do/gitnexus/Caddyfile \ "$SSH_USER@$SSH_HOST:/opt/gitnexus/" - - name: Rsync indexes and prune stale ones + - name: Prune stale indexes then sync fresh ones env: SSH_USER: ${{ secrets.GITNEXUS_DO_USER }} SSH_HOST: ${{ secrets.GITNEXUS_DO_HOST }} ACTIVE_NAMES: ${{ steps.resolve.outputs.active_names }} run: | set -e - # Push every active index up - for dir in staging/*/; do - [ -d "$dir" ] || continue - name=$(basename "$dir") - echo "Syncing $name" - ssh -i ~/.ssh/deploy_key "$SSH_USER@$SSH_HOST" \ - "mkdir -p /opt/gitnexus/indexes/$name" - rsync -az --delete -e "ssh -i ~/.ssh/deploy_key" \ - "$dir" \ - "$SSH_USER@$SSH_HOST:/opt/gitnexus/indexes/$name/" - done - - # Prune any folders on the droplet that aren't in the active set. - # This cleans up closed PRs the cleanup workflow might have missed, - # and is safe because main/dev/PR- are always present if active. + # ── Step 1: prune FIRST ──────────────────────────────── + # Remove any folders on the droplet that aren't in the active set. + # This frees disk BEFORE rsyncing new data, which matters on a + # 10GB disk where each index is ~130MB. echo "Pruning stale indexes (keeping: $ACTIVE_NAMES)" ssh -i ~/.ssh/deploy_key "$SSH_USER@$SSH_HOST" \ ACTIVE_NAMES="$ACTIVE_NAMES" bash <<'REMOTE' set -e cd /opt/gitnexus/indexes || exit 0 - # nullglob makes `for dir in */` expand to nothing when the - # directory is empty (first deploy), instead of the literal - # string "*/". Explicit no-op > relying on rm -f to silently - # tolerate a nonexistent file named "*". shopt -s nullglob IFS=',' read -ra ACTIVE <<< "$ACTIVE_NAMES" for dir in */; do @@ -404,8 +406,49 @@ jobs: rm -rf "$dir" fi done + echo "Disk after prune:" + df -h / | tail -1 REMOTE + # ── Step 2: rsync-then-swap ───────────────────────────── + # Upload each index to a temp directory, then atomically swap + # it into place. If rsync fails, the old index survives intact + # and the partial temp dir is cleaned up — no production data + # is lost. The brief period where both old + new exist costs + # ~130MB of extra disk, but the prune step already freed + # space from evicted PR indexes so this fits on a 10GB disk. + for dir in staging/*/; do + [ -d "$dir" ] || continue + name=$(basename "$dir") + echo "Syncing $name (rsync-then-swap)" + ssh -i ~/.ssh/deploy_key "$SSH_USER@$SSH_HOST" \ + "mkdir -p /opt/gitnexus/indexes/${name}.new" + if rsync -az -e "ssh -i ~/.ssh/deploy_key" \ + "$dir" \ + "$SSH_USER@$SSH_HOST:/opt/gitnexus/indexes/${name}.new/"; then + # Swap: remove old, rename new into place + ssh -i ~/.ssh/deploy_key "$SSH_USER@$SSH_HOST" \ + "rm -rf /opt/gitnexus/indexes/$name && mv /opt/gitnexus/indexes/${name}.new /opt/gitnexus/indexes/$name" + echo " $name swapped successfully" + else + # Clean up the partial temp dir + ssh -i ~/.ssh/deploy_key "$SSH_USER@$SSH_HOST" \ + "rm -rf /opt/gitnexus/indexes/${name}.new" + # main/dev are critical — abort the deploy so the failure + # is visible and the container isn't restarted with stale + # or missing data. PR indexes are best-effort. + case "$name" in + LibreChat|LibreChat-dev) + echo "::error::rsync failed for critical index $name — aborting deploy" + exit 1 + ;; + *) + echo "::warning::rsync failed for PR index $name — keeping previous index" + ;; + esac + fi + done + - name: Pull image, restart gitnexus, reload Caddy, wait for healthy env: SSH_USER: ${{ secrets.GITNEXUS_DO_USER }} @@ -414,6 +457,31 @@ jobs: ssh -i ~/.ssh/deploy_key "$SSH_USER@$SSH_HOST" bash <<'REMOTE' set -e cd /opt/gitnexus + + # ── Disk cleanup ────────────────────────────────────── + # Docker accumulates old image layers, dangling images, and + # build cache across deploys. On a 60GB droplet with a 700MB+ + # gitnexus image, this fills the disk after ~40 deploys. + # Prune everything not used by currently-running containers + # BEFORE pulling the new image so the extract has room. + echo "Disk before cleanup:" + df -h / | tail -1 + # Omit --volumes: Caddy's caddy-data and caddy-config volumes + # hold TLS certificates and ACME state. If Caddy happens to be + # stopped when this runs (the workflow handles that case later), + # --volumes would wipe them, forcing Let's Encrypt re-issuance + # and risking rate-limit lockout (5 certs/domain/week). + docker system prune -af 2>/dev/null || true + echo "Disk after cleanup:" + df -h / | tail -1 + + # Fail fast if disk is critically low even after prune + AVAIL_MB=$(df --output=avail -m / | tail -1 | tr -d ' ') + if [ "$AVAIL_MB" -lt 2048 ]; then + echo "::error::Disk critically low (${AVAIL_MB}MB free). Aborting deploy." + exit 1 + fi + docker compose pull gitnexus docker compose up -d --force-recreate gitnexus diff --git a/.github/workflows/gitnexus-index.yml b/.github/workflows/gitnexus-index.yml index 3b0c052c81..97cbc85f29 100644 --- a/.github/workflows/gitnexus-index.yml +++ b/.github/workflows/gitnexus-index.yml @@ -45,17 +45,18 @@ env: jobs: index: - # Allow push + dispatch unconditionally; filter native pull_request - # events to contributors only. The /gitnexus command workflow does - # its own contributor-commenter check before it dispatches this - # workflow, so workflow_dispatch is always trusted here — including - # the case where the commenter wants to index a non-contributor or - # fork PR (the command uses refs/pull//head so checkout resolves). + # Push + dispatch run unconditionally. Native pull_request events + # are restricted to PRs authored by danny-avila only — this keeps + # automatic CI spend low on a repo with 200+ open PRs. + # + # Other contributors' PRs can still be indexed on demand: + # - /gitnexus index (PR comment command, contributor-gated) + # - workflow_dispatch (manual dispatch from Actions UI) + # Both bypass this filter because they arrive as workflow_dispatch, + # not pull_request. if: | github.event_name != 'pull_request' || - github.event.pull_request.author_association == 'OWNER' || - github.event.pull_request.author_association == 'MEMBER' || - github.event.pull_request.author_association == 'COLLABORATOR' + github.event.pull_request.user.login == 'danny-avila' runs-on: ubuntu-latest timeout-minutes: 25 steps: