Merge branch 'fix/security-headers-zap-scan-1041' into feature/comprehensive-security-enhancements

Merges comprehensive security headers implementation from security-headers branch: - SecurityHeaders middleware for HTTP security headers - CORS handling with origin reflection - CSP, X-Frame-Options, HSTS, and other security headers - Removes inline security header code from app_factory - Removes ZAP workflow (replaced by security headers) Conflict resolutions: - Kept our SESSION_COOKIE_SECURE CI detection logic (more secure than always False) - Replaced inline security headers with SecurityHeaders middleware - Updated version to 1.3.0 - Kept our search_engine_github implementation
2026-06-16 03:51:07 +03:00 · 2025-11-13 00:42:10 +01:00
parent be4e991905 4f130b1ad0
commit 7391f36066
277 changed files with 43842 additions and 5978 deletions
--- a/.github/scripts/check-file-writes.sh
+++ b/.github/scripts/check-file-writes.sh
@@ -0,0 +1,220 @@
+#!/bin/bash
+
+# Security check for potential unencrypted file writes to disk
+# This script helps prevent accidentally bypassing encryption at rest
+
+set -e
+
+echo "Checking for potential unencrypted file writes to disk..."
+echo "========================================="
+
+# Patterns that might indicate writing sensitive data to disk
+# Note: Using basic grep patterns without lookaheads
+SUSPICIOUS_PATTERNS=(
+  # Python patterns
+  "\.write\("
+  "\.save\("
+  "\.dump\("
+  "open\(.*['\"]w['\"].*\)"
+  "open\(.*['\"]wb['\"].*\)"
+  "with.*open\(.*['\"]w['\"]"
+  "with.*open\(.*['\"]wb['\"]"
+  "\.to_csv\("
+  "\.to_json\("
+  "\.to_excel\("
+  "\.to_pickle\("
+  "tempfile\.NamedTemporaryFile.*delete=False"
+  "Path.*\.write_text\("
+  "Path.*\.write_bytes\("
+  "shutil\.copy"
+  "shutil\.move"
+  "\.export_to_file\("
+  "\.save_to_file\("
+  "\.write_pdf\("
+  "\.savefig\("
+  # JavaScript patterns
+  "fs\.writeFile"
+  "fs\.writeFileSync"
+  "fs\.createWriteStream"
+  "fs\.appendFile"
+)
+
+# Directories to exclude from checks
+EXCLUDE_DIRS=(
+  "tests"
+  "test"
+  "__pycache__"
+  ".git"
+  "node_modules"
+  ".venv"
+  "venv"
+  "migrations"
+  "static"
+  "vendor"
+  "dist"
+  "build"
+  ".next"
+  "coverage"
+  "examples"
+  "scripts"
+  ".github"
+  "cookiecutter-docker"
+)
+
+# Files to exclude
+EXCLUDE_FILES=(
+  "*_test.py"
+  "test_*.py"
+  "*.test.js"
+  "*.spec.js"
+  "*.test.ts"
+  "*.spec.ts"
+  "setup.py"
+  "webpack.config.js"
+  "**/migrations/*.py"
+  "*.min.js"
+  "*.bundle.js"
+  "*-min.js"
+  "*.min.css"
+)
+
+# Safe keywords that indicate encrypted or safe operations
+# These patterns indicate that file writes have been security-verified
+SAFE_KEYWORDS=(
+  "write_file_verified"
+  "write_json_verified"
+)
+
+# Known safe usage patterns (logs, configs, etc.)
+SAFE_USAGE_PATTERNS=(
+  "security/file_write_verifier.py"
+  "import tempfile"
+  "tempfile\.mkdtemp"
+  "tmp_path"
+  "tmp_file"
+)
+
+# Build exclude arguments for grep
+EXCLUDE_ARGS=""
+for dir in "${EXCLUDE_DIRS[@]}"; do
+  EXCLUDE_ARGS="$EXCLUDE_ARGS --exclude-dir=$dir"
+done
+
+for file in "${EXCLUDE_FILES[@]}"; do
+  EXCLUDE_ARGS="$EXCLUDE_ARGS --exclude=$file"
+done
+
+# Track if we found any issues
+FOUND_ISSUES=0
+ALL_MATCHES=""
+
+echo "Scanning codebase for suspicious patterns..."
+
+# Search only in src/ directory to avoid .venv and other non-source directories
+SEARCH_PATHS="src/"
+
+# Single pass to collect all matches
+for pattern in "${SUSPICIOUS_PATTERNS[@]}"; do
+  # Use grep with binary files excluded and max line length to avoid issues with minified files
+  matches=$(grep -rn -I $EXCLUDE_ARGS -- "$pattern" $SEARCH_PATHS --include="*.py" --include="*.js" --include="*.ts" 2>/dev/null | head -1000 || true)
+  if [ -n "$matches" ]; then
+    ALL_MATCHES="$ALL_MATCHES$matches\n"
+  fi
+done
+
+# Also check for specific problematic patterns in one pass
+temp_matches=$(grep -rn -I $EXCLUDE_ARGS -E "tmp_path|tempfile|/tmp/" $SEARCH_PATHS --include="*.py" 2>/dev/null | head -500 || true)
+if [ -n "$temp_matches" ]; then
+  ALL_MATCHES="$ALL_MATCHES$temp_matches\n"
+fi
+
+db_matches=$(grep -rn -I $EXCLUDE_ARGS -E "report_content.*open|report_content.*write|markdown_content.*open|markdown_content.*write" $SEARCH_PATHS --include="*.py" 2>/dev/null | head -500 || true)
+if [ -n "$db_matches" ]; then
+  ALL_MATCHES="$ALL_MATCHES$db_matches\n"
+fi
+
+export_matches=$(grep -rn -I $EXCLUDE_ARGS -E "export.*Path|export.*path\.open|export.*\.write" $SEARCH_PATHS --include="*.py" 2>/dev/null | head -500 || true)
+if [ -n "$export_matches" ]; then
+  ALL_MATCHES="$ALL_MATCHES$export_matches\n"
+fi
+
+# Now filter all matches at once
+if [ -n "$ALL_MATCHES" ]; then
+  echo "Filtering results for false positives..."
+
+  # Remove duplicates and sort (use tr to handle potential null bytes)
+  ALL_MATCHES=$(echo -e "$ALL_MATCHES" | tr -d '\0' | sort -u)
+
+  filtered_matches=""
+  while IFS= read -r line; do
+    [ -z "$line" ] && continue
+
+    # Check if line contains safe keywords
+    skip_line=0
+    for safe_pattern in "${SAFE_KEYWORDS[@]}"; do
+      if echo "$line" | grep -qE -- "$safe_pattern"; then
+        skip_line=1
+        break
+      fi
+    done
+
+    # Check if line contains safe usage patterns
+    if [ "$skip_line" -eq 0 ]; then
+      for usage_pattern in "${SAFE_USAGE_PATTERNS[@]}"; do
+        if echo "$line" | grep -qE -- "$usage_pattern"; then
+          skip_line=1
+          break
+        fi
+      done
+    fi
+
+    # Additional filters for test/mock files that might not be caught by path exclusion
+    if [ "$skip_line" -eq 0 ]; then
+      if echo "$line" | grep -qE "test|mock|stub" && ! echo "$line" | grep -q "#"; then
+        skip_line=1
+      fi
+    fi
+
+    # Filter system config files (not user data)
+    if [ "$skip_line" -eq 0 ]; then
+      if echo "$line" | grep -qE "web/app_factory\.py|web/server_config\.py|web_search_engines/engines/search_engine_local\.py"; then
+        skip_line=1
+      fi
+    fi
+
+    # Filter safe temp files with proper cleanup
+    if [ "$skip_line" -eq 0 ]; then
+      if echo "$line" | grep -q "database/encrypted_db.py"; then
+        skip_line=1
+      fi
+    fi
+
+    if [ "$skip_line" -eq 0 ]; then
+      filtered_matches="$filtered_matches$line\n"
+      FOUND_ISSUES=1
+    fi
+  done <<< "$ALL_MATCHES"
+
+  if [ -n "$filtered_matches" ] && [ "$FOUND_ISSUES" -eq 1 ]; then
+    echo "⚠️  Found potential unencrypted file writes:"
+    echo "========================================="
+    echo -e "$filtered_matches"
+  fi
+fi
+
+echo "========================================="
+
+if [ $FOUND_ISSUES -eq 1 ]; then
+  echo "❌ Security check failed: Found potential unencrypted file writes"
+  echo ""
+  echo "Please review the above findings and ensure:"
+  echo "1. Sensitive data is not written to disk unencrypted"
+  echo "2. Temporary files are properly cleaned up"
+  echo "3. Use in-memory operations where possible"
+  echo "4. If file writes are necessary, ensure they're encrypted or add '# Safe: <reason>' comment"
+  echo ""
+  echo "For exports, use the in-memory pattern like in export_report_to_memory()"
+  exit 1
+else
+  echo "✅ Security check passed: No suspicious unencrypted file writes detected"
+fi