fix: remove naive secret detection from whitelist-check (gitleaks handles this)

The SECRET_VIOLATIONS check was causing false positives by flagging legitimate code that references keywords like 'api_key', 'password', 'token' (e.g., class attributes like `requires_api_key = False`). Gitleaks already runs as a separate workflow and handles secret detection with context-aware rules that don't produce these false positives.
2026-06-15 19:46:56 +03:00 · 2025-12-05 00:08:31 +01:00
parent 484f32741d
commit 730a17446b
1 changed files with 1 additions and 86 deletions
--- a/.github/scripts/file-whitelist-check.sh
+++ b/.github/scripts/file-whitelist-check.sh
@@ -87,7 +87,6 @@ echo ""
 FILES_CHECKED=0
 WHITELIST_VIOLATIONS=()
 LARGE_FILES=()
-SECRET_VIOLATIONS=()
 BINARY_FILES=()
 SUSPICIOUS_FILES=()
 RESEARCH_DATA_VIOLATIONS=()
@@ -138,61 +137,7 @@ if file "$file" | grep -q "binary"; then
 BINARY_FILES+=("$file")
 fi

-# 4. Secret pattern check in file content - whitelist approach
-if [ -f "$file" ] && [ -r "$file" ]; then
-# Define whitelist patterns for legitimate files that can contain sensitive-looking keywords
-SAFE_FILE_PATTERNS=(
-"src/local_deep_research/metrics/.*\.py$"
-"src/local_deep_research/web_search_engines/.*\.py$"
-"src/local_deep_research/web/services/.*\.py$"
-"src/local_deep_research/utilities/.*\.py$"
-"src/local_deep_research/config/.*\.py$"
-"src/local_deep_research/.*migrate.*\.py$"
-"src/local_deep_research/web/database/.*migration.*\.py$"
-"src/local_deep_research/advanced_search_system/.*\.py$"
-"src/local_deep_research/benchmarks/.*\.py$"
-"src/local_deep_research/web/static/js/components/.*\.js$"
-"src/local_deep_research/database/.*\.py$"
-"src/local_deep_research/web/queue/.*\.py$"
-"src/local_deep_research/api/.*\.py$"
-"src/local_deep_research/settings/.*\.py$"
-"src/local_deep_research/web/auth/.*\.py$"
-"src/local_deep_research/news/.*\.py$"
-"src/local_deep_research/defaults/settings/.*\.json$"
-"src/local_deep_research/defaults/settings_.*\.json$"
-"src/local_deep_research/defaults/llm_providers/.*\.json$"
-"src/local_deep_research/defaults/research_library/.*\.json$"
-"docs/.*\.md$"
-"tests/.*\.py$"
-".*test.*\.py$"
-".*mock.*\.py$"
-".*example.*\.py$"
-"scripts/audit_.*\.py$"
-"\.github/CODEOWNERS$"
-"\.github/workflows/.*\.yml$"
-"github/scripts/.*\.sh$"
-)
-
-# Check if file matches whitelist patterns
-FILE_WHITELISTED=false
-for pattern in "${SAFE_FILE_PATTERNS[@]}"; do
-if echo "$file" | grep -qE "$pattern"; then
-FILE_WHITELISTED=true
-break
-fi
-done
-
-# Only check for secrets if file is not whitelisted
-if [ "$FILE_WHITELISTED" = "false" ]; then
-# Enhanced secret detection with LLM provider keys
-if grep -iE "(api[_-]?key|secret|password|token|private[_-]?key|sk-[a-zA-Z0-9]{20,}|claude-[a-zA-Z0-9]{20,}|AIzaSy[a-zA-Z0-9_-]{33})" "$file" >/dev/null 2>&1; then
-# Additional check for obvious false positives
-if ! grep -iE "(example|sample|test|mock|placeholder|<.*>|\{\{.*\}\})" "$file" >/dev/null 2>&1; then
-SECRET_VIOLATIONS+=("$file")
-fi
-fi
-fi
-fi
+# 4. Secret pattern check - REMOVED: gitleaks workflow handles this more accurately

 # 5. Suspicious filename patterns - whitelist approach
 SAFE_FILENAME_PATTERNS=(
@@ -327,7 +272,6 @@ echo "📋 Summary of findings:"
 echo "   - File type violations: ${#WHITELIST_VIOLATIONS[@]}"
 echo "   - Large files: ${#LARGE_FILES[@]}"
 echo "   - Binary files: ${#BINARY_FILES[@]}"
-echo "   - Potential secrets: ${#SECRET_VIOLATIONS[@]}"
 echo "   - Suspicious filenames: ${#SUSPICIOUS_FILES[@]}"
 echo "   - Research data leaks: ${#RESEARCH_DATA_VIOLATIONS[@]}"
 echo "   - Hardcoded Flask secrets: ${#FLASK_SECRET_VIOLATIONS[@]}"
@@ -394,35 +338,6 @@ echo ""
 done
 fi

-if [ ${#SECRET_VIOLATIONS[@]} -gt 0 ]; then
-echo ""
-echo "❌ POTENTIAL SECRETS IN FILE CONTENT - Suspicious patterns found:"
-echo "   These files contain keywords like 'api_key', 'secret', 'password', 'token' but"
-echo "   are not in the whitelisted safe directories. Review them carefully!"
-echo ""
-for violation in "${SECRET_VIOLATIONS[@]}"; do
-echo "  🔍 EXAMINING: $violation"
-
-# Show the specific lines that triggered the detection
-echo "     → Suspicious content found:"
-grep -n -iE "(api[_-]?key|secret|password|token|private[_-]?key)" "$violation" 2>/dev/null | head -5 | while read -r line; do
-echo "       $line"
-done
-
-# Show file type and size for context
-if [ -f "$violation" ]; then
-FILE_TYPE=$(file -b "$violation" 2>/dev/null || echo "unknown")
-FILE_SIZE=$(stat -c%s "$violation" 2>/dev/null || echo "unknown")
-echo "     → File info: $FILE_TYPE (${FILE_SIZE} bytes)"
-fi
-
-echo "     → Issue: Contains sensitive-looking keywords outside whitelisted areas"
-echo "     → Fix: Either add to SAFE_FILE_PATTERNS whitelist or remove secrets"
-echo ""
-done
-TOTAL_VIOLATIONS=$((TOTAL_VIOLATIONS + ${#SECRET_VIOLATIONS[@]}))
-fi
-
 if [ ${#SUSPICIOUS_FILES[@]} -gt 0 ]; then
 echo ""
 echo "❌ SUSPICIOUS FILENAMES - Files with security-sensitive names:"