Merge branch 'fix/security-headers-zap-scan-1041' into feature/comprehensive-security-enhancements

Merges comprehensive security headers implementation from security-headers branch:
- SecurityHeaders middleware for HTTP security headers
- CORS handling with origin reflection
- CSP, X-Frame-Options, HSTS, and other security headers
- Removes inline security header code from app_factory
- Removes ZAP workflow (replaced by security headers)

Conflict resolutions:
- Kept our SESSION_COOKIE_SECURE CI detection logic (more secure than always False)
- Replaced inline security headers with SecurityHeaders middleware
- Updated version to 1.3.0
- Kept our search_engine_github implementation
This commit is contained in:
LearningCircuit
2025-11-13 00:42:10 +01:00
277 changed files with 43842 additions and 5978 deletions

220
.github/scripts/check-file-writes.sh vendored Executable file
View File

@@ -0,0 +1,220 @@
#!/bin/bash
# Security check for potential unencrypted file writes to disk
# This script helps prevent accidentally bypassing encryption at rest
set -e
echo "Checking for potential unencrypted file writes to disk..."
echo "========================================="
# Patterns that might indicate writing sensitive data to disk
# Note: Using basic grep patterns without lookaheads
SUSPICIOUS_PATTERNS=(
# Python patterns
"\.write\("
"\.save\("
"\.dump\("
"open\(.*['\"]w['\"].*\)"
"open\(.*['\"]wb['\"].*\)"
"with.*open\(.*['\"]w['\"]"
"with.*open\(.*['\"]wb['\"]"
"\.to_csv\("
"\.to_json\("
"\.to_excel\("
"\.to_pickle\("
"tempfile\.NamedTemporaryFile.*delete=False"
"Path.*\.write_text\("
"Path.*\.write_bytes\("
"shutil\.copy"
"shutil\.move"
"\.export_to_file\("
"\.save_to_file\("
"\.write_pdf\("
"\.savefig\("
# JavaScript patterns
"fs\.writeFile"
"fs\.writeFileSync"
"fs\.createWriteStream"
"fs\.appendFile"
)
# Directories to exclude from checks
EXCLUDE_DIRS=(
"tests"
"test"
"__pycache__"
".git"
"node_modules"
".venv"
"venv"
"migrations"
"static"
"vendor"
"dist"
"build"
".next"
"coverage"
"examples"
"scripts"
".github"
"cookiecutter-docker"
)
# Files to exclude
EXCLUDE_FILES=(
"*_test.py"
"test_*.py"
"*.test.js"
"*.spec.js"
"*.test.ts"
"*.spec.ts"
"setup.py"
"webpack.config.js"
"**/migrations/*.py"
"*.min.js"
"*.bundle.js"
"*-min.js"
"*.min.css"
)
# Safe keywords that indicate encrypted or safe operations
# These patterns indicate that file writes have been security-verified
SAFE_KEYWORDS=(
"write_file_verified"
"write_json_verified"
)
# Known safe usage patterns (logs, configs, etc.)
SAFE_USAGE_PATTERNS=(
"security/file_write_verifier.py"
"import tempfile"
"tempfile\.mkdtemp"
"tmp_path"
"tmp_file"
)
# Build exclude arguments for grep
EXCLUDE_ARGS=""
for dir in "${EXCLUDE_DIRS[@]}"; do
EXCLUDE_ARGS="$EXCLUDE_ARGS --exclude-dir=$dir"
done
for file in "${EXCLUDE_FILES[@]}"; do
EXCLUDE_ARGS="$EXCLUDE_ARGS --exclude=$file"
done
# Track if we found any issues
FOUND_ISSUES=0
ALL_MATCHES=""
echo "Scanning codebase for suspicious patterns..."
# Search only in src/ directory to avoid .venv and other non-source directories
SEARCH_PATHS="src/"
# Single pass to collect all matches
for pattern in "${SUSPICIOUS_PATTERNS[@]}"; do
# Use grep with binary files excluded and max line length to avoid issues with minified files
matches=$(grep -rn -I $EXCLUDE_ARGS -- "$pattern" $SEARCH_PATHS --include="*.py" --include="*.js" --include="*.ts" 2>/dev/null | head -1000 || true)
if [ -n "$matches" ]; then
ALL_MATCHES="$ALL_MATCHES$matches\n"
fi
done
# Also check for specific problematic patterns in one pass
temp_matches=$(grep -rn -I $EXCLUDE_ARGS -E "tmp_path|tempfile|/tmp/" $SEARCH_PATHS --include="*.py" 2>/dev/null | head -500 || true)
if [ -n "$temp_matches" ]; then
ALL_MATCHES="$ALL_MATCHES$temp_matches\n"
fi
db_matches=$(grep -rn -I $EXCLUDE_ARGS -E "report_content.*open|report_content.*write|markdown_content.*open|markdown_content.*write" $SEARCH_PATHS --include="*.py" 2>/dev/null | head -500 || true)
if [ -n "$db_matches" ]; then
ALL_MATCHES="$ALL_MATCHES$db_matches\n"
fi
export_matches=$(grep -rn -I $EXCLUDE_ARGS -E "export.*Path|export.*path\.open|export.*\.write" $SEARCH_PATHS --include="*.py" 2>/dev/null | head -500 || true)
if [ -n "$export_matches" ]; then
ALL_MATCHES="$ALL_MATCHES$export_matches\n"
fi
# Now filter all matches at once
if [ -n "$ALL_MATCHES" ]; then
echo "Filtering results for false positives..."
# Remove duplicates and sort (use tr to handle potential null bytes)
ALL_MATCHES=$(echo -e "$ALL_MATCHES" | tr -d '\0' | sort -u)
filtered_matches=""
while IFS= read -r line; do
[ -z "$line" ] && continue
# Check if line contains safe keywords
skip_line=0
for safe_pattern in "${SAFE_KEYWORDS[@]}"; do
if echo "$line" | grep -qE -- "$safe_pattern"; then
skip_line=1
break
fi
done
# Check if line contains safe usage patterns
if [ "$skip_line" -eq 0 ]; then
for usage_pattern in "${SAFE_USAGE_PATTERNS[@]}"; do
if echo "$line" | grep -qE -- "$usage_pattern"; then
skip_line=1
break
fi
done
fi
# Additional filters for test/mock files that might not be caught by path exclusion
if [ "$skip_line" -eq 0 ]; then
if echo "$line" | grep -qE "test|mock|stub" && ! echo "$line" | grep -q "#"; then
skip_line=1
fi
fi
# Filter system config files (not user data)
if [ "$skip_line" -eq 0 ]; then
if echo "$line" | grep -qE "web/app_factory\.py|web/server_config\.py|web_search_engines/engines/search_engine_local\.py"; then
skip_line=1
fi
fi
# Filter safe temp files with proper cleanup
if [ "$skip_line" -eq 0 ]; then
if echo "$line" | grep -q "database/encrypted_db.py"; then
skip_line=1
fi
fi
if [ "$skip_line" -eq 0 ]; then
filtered_matches="$filtered_matches$line\n"
FOUND_ISSUES=1
fi
done <<< "$ALL_MATCHES"
if [ -n "$filtered_matches" ] && [ "$FOUND_ISSUES" -eq 1 ]; then
echo "⚠️ Found potential unencrypted file writes:"
echo "========================================="
echo -e "$filtered_matches"
fi
fi
echo "========================================="
if [ $FOUND_ISSUES -eq 1 ]; then
echo "❌ Security check failed: Found potential unencrypted file writes"
echo ""
echo "Please review the above findings and ensure:"
echo "1. Sensitive data is not written to disk unencrypted"
echo "2. Temporary files are properly cleaned up"
echo "3. Use in-memory operations where possible"
echo "4. If file writes are necessary, ensure they're encrypted or add '# Safe: <reason>' comment"
echo ""
echo "For exports, use the in-memory pattern like in export_report_to_memory()"
exit 1
else
echo "✅ Security check passed: No suspicious unencrypted file writes detected"
fi