mirror of
https://github.com/LearningCircuit/local-deep-research.git
synced 2026-06-16 03:51:07 +03:00
Merge branch 'fix/security-headers-zap-scan-1041' into feature/comprehensive-security-enhancements
Merges comprehensive security headers implementation from security-headers branch: - SecurityHeaders middleware for HTTP security headers - CORS handling with origin reflection - CSP, X-Frame-Options, HSTS, and other security headers - Removes inline security header code from app_factory - Removes ZAP workflow (replaced by security headers) Conflict resolutions: - Kept our SESSION_COOKIE_SECURE CI detection logic (more secure than always False) - Replaced inline security headers with SecurityHeaders middleware - Updated version to 1.3.0 - Kept our search_engine_github implementation
This commit is contained in:
220
.github/scripts/check-file-writes.sh
vendored
Executable file
220
.github/scripts/check-file-writes.sh
vendored
Executable file
@@ -0,0 +1,220 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Security check for potential unencrypted file writes to disk
|
||||
# This script helps prevent accidentally bypassing encryption at rest
|
||||
|
||||
set -e
|
||||
|
||||
echo "Checking for potential unencrypted file writes to disk..."
|
||||
echo "========================================="
|
||||
|
||||
# Patterns that might indicate writing sensitive data to disk
|
||||
# Note: Using basic grep patterns without lookaheads
|
||||
SUSPICIOUS_PATTERNS=(
|
||||
# Python patterns
|
||||
"\.write\("
|
||||
"\.save\("
|
||||
"\.dump\("
|
||||
"open\(.*['\"]w['\"].*\)"
|
||||
"open\(.*['\"]wb['\"].*\)"
|
||||
"with.*open\(.*['\"]w['\"]"
|
||||
"with.*open\(.*['\"]wb['\"]"
|
||||
"\.to_csv\("
|
||||
"\.to_json\("
|
||||
"\.to_excel\("
|
||||
"\.to_pickle\("
|
||||
"tempfile\.NamedTemporaryFile.*delete=False"
|
||||
"Path.*\.write_text\("
|
||||
"Path.*\.write_bytes\("
|
||||
"shutil\.copy"
|
||||
"shutil\.move"
|
||||
"\.export_to_file\("
|
||||
"\.save_to_file\("
|
||||
"\.write_pdf\("
|
||||
"\.savefig\("
|
||||
# JavaScript patterns
|
||||
"fs\.writeFile"
|
||||
"fs\.writeFileSync"
|
||||
"fs\.createWriteStream"
|
||||
"fs\.appendFile"
|
||||
)
|
||||
|
||||
# Directories to exclude from checks
|
||||
EXCLUDE_DIRS=(
|
||||
"tests"
|
||||
"test"
|
||||
"__pycache__"
|
||||
".git"
|
||||
"node_modules"
|
||||
".venv"
|
||||
"venv"
|
||||
"migrations"
|
||||
"static"
|
||||
"vendor"
|
||||
"dist"
|
||||
"build"
|
||||
".next"
|
||||
"coverage"
|
||||
"examples"
|
||||
"scripts"
|
||||
".github"
|
||||
"cookiecutter-docker"
|
||||
)
|
||||
|
||||
# Files to exclude
|
||||
EXCLUDE_FILES=(
|
||||
"*_test.py"
|
||||
"test_*.py"
|
||||
"*.test.js"
|
||||
"*.spec.js"
|
||||
"*.test.ts"
|
||||
"*.spec.ts"
|
||||
"setup.py"
|
||||
"webpack.config.js"
|
||||
"**/migrations/*.py"
|
||||
"*.min.js"
|
||||
"*.bundle.js"
|
||||
"*-min.js"
|
||||
"*.min.css"
|
||||
)
|
||||
|
||||
# Safe keywords that indicate encrypted or safe operations
|
||||
# These patterns indicate that file writes have been security-verified
|
||||
SAFE_KEYWORDS=(
|
||||
"write_file_verified"
|
||||
"write_json_verified"
|
||||
)
|
||||
|
||||
# Known safe usage patterns (logs, configs, etc.)
|
||||
SAFE_USAGE_PATTERNS=(
|
||||
"security/file_write_verifier.py"
|
||||
"import tempfile"
|
||||
"tempfile\.mkdtemp"
|
||||
"tmp_path"
|
||||
"tmp_file"
|
||||
)
|
||||
|
||||
# Build exclude arguments for grep
|
||||
EXCLUDE_ARGS=""
|
||||
for dir in "${EXCLUDE_DIRS[@]}"; do
|
||||
EXCLUDE_ARGS="$EXCLUDE_ARGS --exclude-dir=$dir"
|
||||
done
|
||||
|
||||
for file in "${EXCLUDE_FILES[@]}"; do
|
||||
EXCLUDE_ARGS="$EXCLUDE_ARGS --exclude=$file"
|
||||
done
|
||||
|
||||
# Track if we found any issues
|
||||
FOUND_ISSUES=0
|
||||
ALL_MATCHES=""
|
||||
|
||||
echo "Scanning codebase for suspicious patterns..."
|
||||
|
||||
# Search only in src/ directory to avoid .venv and other non-source directories
|
||||
SEARCH_PATHS="src/"
|
||||
|
||||
# Single pass to collect all matches
|
||||
for pattern in "${SUSPICIOUS_PATTERNS[@]}"; do
|
||||
# Use grep with binary files excluded and max line length to avoid issues with minified files
|
||||
matches=$(grep -rn -I $EXCLUDE_ARGS -- "$pattern" $SEARCH_PATHS --include="*.py" --include="*.js" --include="*.ts" 2>/dev/null | head -1000 || true)
|
||||
if [ -n "$matches" ]; then
|
||||
ALL_MATCHES="$ALL_MATCHES$matches\n"
|
||||
fi
|
||||
done
|
||||
|
||||
# Also check for specific problematic patterns in one pass
|
||||
temp_matches=$(grep -rn -I $EXCLUDE_ARGS -E "tmp_path|tempfile|/tmp/" $SEARCH_PATHS --include="*.py" 2>/dev/null | head -500 || true)
|
||||
if [ -n "$temp_matches" ]; then
|
||||
ALL_MATCHES="$ALL_MATCHES$temp_matches\n"
|
||||
fi
|
||||
|
||||
db_matches=$(grep -rn -I $EXCLUDE_ARGS -E "report_content.*open|report_content.*write|markdown_content.*open|markdown_content.*write" $SEARCH_PATHS --include="*.py" 2>/dev/null | head -500 || true)
|
||||
if [ -n "$db_matches" ]; then
|
||||
ALL_MATCHES="$ALL_MATCHES$db_matches\n"
|
||||
fi
|
||||
|
||||
export_matches=$(grep -rn -I $EXCLUDE_ARGS -E "export.*Path|export.*path\.open|export.*\.write" $SEARCH_PATHS --include="*.py" 2>/dev/null | head -500 || true)
|
||||
if [ -n "$export_matches" ]; then
|
||||
ALL_MATCHES="$ALL_MATCHES$export_matches\n"
|
||||
fi
|
||||
|
||||
# Now filter all matches at once
|
||||
if [ -n "$ALL_MATCHES" ]; then
|
||||
echo "Filtering results for false positives..."
|
||||
|
||||
# Remove duplicates and sort (use tr to handle potential null bytes)
|
||||
ALL_MATCHES=$(echo -e "$ALL_MATCHES" | tr -d '\0' | sort -u)
|
||||
|
||||
filtered_matches=""
|
||||
while IFS= read -r line; do
|
||||
[ -z "$line" ] && continue
|
||||
|
||||
# Check if line contains safe keywords
|
||||
skip_line=0
|
||||
for safe_pattern in "${SAFE_KEYWORDS[@]}"; do
|
||||
if echo "$line" | grep -qE -- "$safe_pattern"; then
|
||||
skip_line=1
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
# Check if line contains safe usage patterns
|
||||
if [ "$skip_line" -eq 0 ]; then
|
||||
for usage_pattern in "${SAFE_USAGE_PATTERNS[@]}"; do
|
||||
if echo "$line" | grep -qE -- "$usage_pattern"; then
|
||||
skip_line=1
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# Additional filters for test/mock files that might not be caught by path exclusion
|
||||
if [ "$skip_line" -eq 0 ]; then
|
||||
if echo "$line" | grep -qE "test|mock|stub" && ! echo "$line" | grep -q "#"; then
|
||||
skip_line=1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Filter system config files (not user data)
|
||||
if [ "$skip_line" -eq 0 ]; then
|
||||
if echo "$line" | grep -qE "web/app_factory\.py|web/server_config\.py|web_search_engines/engines/search_engine_local\.py"; then
|
||||
skip_line=1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Filter safe temp files with proper cleanup
|
||||
if [ "$skip_line" -eq 0 ]; then
|
||||
if echo "$line" | grep -q "database/encrypted_db.py"; then
|
||||
skip_line=1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$skip_line" -eq 0 ]; then
|
||||
filtered_matches="$filtered_matches$line\n"
|
||||
FOUND_ISSUES=1
|
||||
fi
|
||||
done <<< "$ALL_MATCHES"
|
||||
|
||||
if [ -n "$filtered_matches" ] && [ "$FOUND_ISSUES" -eq 1 ]; then
|
||||
echo "⚠️ Found potential unencrypted file writes:"
|
||||
echo "========================================="
|
||||
echo -e "$filtered_matches"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "========================================="
|
||||
|
||||
if [ $FOUND_ISSUES -eq 1 ]; then
|
||||
echo "❌ Security check failed: Found potential unencrypted file writes"
|
||||
echo ""
|
||||
echo "Please review the above findings and ensure:"
|
||||
echo "1. Sensitive data is not written to disk unencrypted"
|
||||
echo "2. Temporary files are properly cleaned up"
|
||||
echo "3. Use in-memory operations where possible"
|
||||
echo "4. If file writes are necessary, ensure they're encrypted or add '# Safe: <reason>' comment"
|
||||
echo ""
|
||||
echo "For exports, use the in-memory pattern like in export_report_to_memory()"
|
||||
exit 1
|
||||
else
|
||||
echo "✅ Security check passed: No suspicious unencrypted file writes detected"
|
||||
fi
|
||||
Reference in New Issue
Block a user