mirror of
https://github.com/LearningCircuit/local-deep-research.git
synced 2026-06-15 19:46:56 +03:00
improve: address AI code review suggestions for security script
- Replace DEBUG output with informative result summaries - Fix file processing loop to handle filenames with spaces/special chars using printf - Improve readability of security scan results with better formatting - Maintain helpful output while removing debug terminology These improvements make the script more robust and user-friendly while maintaining all security checking functionality.
This commit is contained in:
62
.github/scripts/file-whitelist-check.sh
vendored
62
.github/scripts/file-whitelist-check.sh
vendored
@@ -48,39 +48,32 @@ ALLOWED_PATTERNS=(
|
||||
)
|
||||
|
||||
# Get list of files to check
|
||||
echo "🔧 DEBUG: GITHUB_EVENT_NAME=$GITHUB_EVENT_NAME"
|
||||
echo "🔧 DEBUG: GITHUB_BASE_REF=$GITHUB_BASE_REF"
|
||||
|
||||
if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then
|
||||
# For PRs: check all files that would be added/modified in the entire PR
|
||||
echo "🔍 Checking all files in PR commits from $GITHUB_BASE_REF to HEAD..."
|
||||
echo "🔍 Checking files in PR from $GITHUB_BASE_REF to HEAD..."
|
||||
|
||||
echo "🔧 DEBUG: Running git diff command..."
|
||||
CHANGED_FILES=$(git diff --name-only --diff-filter=AM origin/$GITHUB_BASE_REF..HEAD)
|
||||
echo "🔧 DEBUG: git diff found $(echo "$CHANGED_FILES" | wc -l) files"
|
||||
FILE_COUNT=$(echo "$CHANGED_FILES" | wc -l)
|
||||
echo "📋 Found $FILE_COUNT changed files with git diff"
|
||||
|
||||
echo "🔧 DEBUG: Running git log command..."
|
||||
# Also get newly added files across all commits in the PR
|
||||
# Use a more robust approach that handles edge cases
|
||||
ALL_NEW_FILES=$(git log --name-only --pretty=format: --diff-filter=A origin/$GITHUB_BASE_REF..HEAD 2>/dev/null | grep -v '^$' | sort | uniq || echo "")
|
||||
echo "🔧 DEBUG: git log found $(echo "$ALL_NEW_FILES" | wc -w) files"
|
||||
NEW_FILE_COUNT=$(echo "$ALL_NEW_FILES" | wc -w)
|
||||
echo "📋 Found $NEW_FILE_COUNT newly added files with git log"
|
||||
|
||||
# Combine both lists and remove duplicates - handle empty ALL_NEW_FILES
|
||||
if [ -n "$ALL_NEW_FILES" ]; then
|
||||
CHANGED_FILES=$(echo -e "$CHANGED_FILES\n$ALL_NEW_FILES" | sort | uniq | grep -v '^$')
|
||||
fi
|
||||
echo "🔧 DEBUG: Combined list has $(echo "$CHANGED_FILES" | wc -l) files"
|
||||
TOTAL_FILE_COUNT=$(echo "$CHANGED_FILES" | wc -l)
|
||||
echo "📋 Total unique files to check: $TOTAL_FILE_COUNT"
|
||||
else
|
||||
# For direct pushes: check files in the current commit
|
||||
echo "🔧 DEBUG: Direct push mode - checking HEAD~1..HEAD"
|
||||
echo "🔍 Checking files in latest commit..."
|
||||
CHANGED_FILES=$(git diff --name-only --diff-filter=AM HEAD~1..HEAD)
|
||||
echo "🔧 DEBUG: Found $(echo "$CHANGED_FILES" | wc -l) files in direct push"
|
||||
fi
|
||||
|
||||
echo "🔧 DEBUG: Files to check:"
|
||||
echo "$CHANGED_FILES" | head -10
|
||||
if [ $(echo "$CHANGED_FILES" | wc -l) -gt 10 ]; then
|
||||
echo "🔧 DEBUG: ... and $(echo "$CHANGED_FILES" | wc -l | awk '{print $1-10}') more files"
|
||||
TOTAL_FILE_COUNT=$(echo "$CHANGED_FILES" | wc -l)
|
||||
echo "📋 Found $TOTAL_FILE_COUNT files in direct push"
|
||||
fi
|
||||
|
||||
echo "🔍 Running comprehensive security checks..."
|
||||
@@ -99,16 +92,12 @@ HARDCODED_PATH_VIOLATIONS=()
|
||||
HARDCODED_IP_VIOLATIONS=()
|
||||
SUSPICIOUS_FILETYPE_VIOLATIONS=()
|
||||
|
||||
echo "🔧 DEBUG: Starting file processing loop..."
|
||||
FILE_COUNT=0
|
||||
while IFS= read -r file; do
|
||||
# Use improved file processing that handles spaces and special characters
|
||||
printf '%s\n' "$CHANGED_FILES" | while IFS= read -r file; do
|
||||
[ -z "$file" ] && continue
|
||||
FILE_COUNT=$((FILE_COUNT + 1))
|
||||
echo "🔧 DEBUG: Processing file $FILE_COUNT: $file"
|
||||
|
||||
# Skip deleted files
|
||||
if [ ! -f "$file" ]; then
|
||||
echo "🔧 DEBUG: File $file does not exist, skipping"
|
||||
continue
|
||||
fi
|
||||
|
||||
@@ -319,19 +308,20 @@ fi
|
||||
done <<< "$CHANGED_FILES"
|
||||
|
||||
# Report all violations with detailed explanations
|
||||
echo "🔧 DEBUG: File processing completed. Checking violations..."
|
||||
echo "🔧 DEBUG: Whitelist violations: ${#WHITELIST_VIOLATIONS[@]}"
|
||||
echo "🔧 DEBUG: Large files: ${#LARGE_FILES[@]}"
|
||||
echo "🔧 DEBUG: Binary files: ${#BINARY_FILES[@]}"
|
||||
echo "🔧 DEBUG: Secret violations: ${#SECRET_VIOLATIONS[@]}"
|
||||
echo "🔧 DEBUG: Suspicious files: ${#SUSPICIOUS_FILES[@]}"
|
||||
echo "🔧 DEBUG: Research data violations: ${#RESEARCH_DATA_VIOLATIONS[@]}"
|
||||
echo "🔧 DEBUG: Flask secret violations: ${#FLASK_SECRET_VIOLATIONS[@]}"
|
||||
echo "🔧 DEBUG: Env file violations: ${#ENV_FILE_VIOLATIONS[@]}"
|
||||
echo "🔧 DEBUG: High entropy violations: ${#HIGH_ENTROPY_VIOLATIONS[@]}"
|
||||
echo "🔧 DEBUG: Hardcoded path violations: ${#HARDCODED_PATH_VIOLATIONS[@]}"
|
||||
echo "🔧 DEBUG: Hardcoded IP violations: ${#HARDCODED_IP_VIOLATIONS[@]}"
|
||||
echo "🔧 DEBUG: Suspicious filetype violations: ${#SUSPICIOUS_FILETYPE_VIOLATIONS[@]}"
|
||||
echo "📊 Security scan completed. Analyzing results..."
|
||||
echo "📋 Summary of findings:"
|
||||
echo " - File type violations: ${#WHITELIST_VIOLATIONS[@]}"
|
||||
echo " - Large files: ${#LARGE_FILES[@]}"
|
||||
echo " - Binary files: ${#BINARY_FILES[@]}"
|
||||
echo " - Potential secrets: ${#SECRET_VIOLATIONS[@]}"
|
||||
echo " - Suspicious filenames: ${#SUSPICIOUS_FILES[@]}"
|
||||
echo " - Research data leaks: ${#RESEARCH_DATA_VIOLATIONS[@]}"
|
||||
echo " - Hardcoded Flask secrets: ${#FLASK_SECRET_VIOLATIONS[@]}"
|
||||
echo " - Environment files: ${#ENV_FILE_VIOLATIONS[@]}"
|
||||
echo " - High-entropy strings: ${#HIGH_ENTROPY_VIOLATIONS[@]}"
|
||||
echo " - Hardcoded paths: ${#HARDCODED_PATH_VIOLATIONS[@]}"
|
||||
echo " - Hardcoded IPs: ${#HARDCODED_IP_VIOLATIONS[@]}"
|
||||
echo " - Suspicious file types: ${#SUSPICIOUS_FILETYPE_VIOLATIONS[@]}"
|
||||
|
||||
TOTAL_VIOLATIONS=0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user