Merge branch 'dev' into refactor/remove-dogpile-cache-add-stampede-protection

Resolve merge conflicts: - pyproject.toml: Keep flask-limiter from dev, remove dogpile-cache/redis/msgpack as intended - test_env_var_usage.py: Keep rate_limiter.py from dev, remove memory_cache/ as intended
2026-06-16 03:51:07 +03:00 · 2025-12-01 21:20:16 +01:00
parent 2b548529fd 82eb66085f
commit 9332256489
71 changed files with 3602 additions and 375 deletions
--- a/.github/scripts/check-file-writes.sh
+++ b/.github/scripts/check-file-writes.sh
@@ -116,6 +116,7 @@ SEARCH_PATHS="src/"
 # Single pass to collect all matches
 for pattern in "${SUSPICIOUS_PATTERNS[@]}"; do
  # Use grep with binary files excluded and max line length to avoid issues with minified files
+  # shellcheck disable=SC2086 # Word splitting is intentional for EXCLUDE_ARGS
  matches=$(grep -rn -I $EXCLUDE_ARGS -- "$pattern" $SEARCH_PATHS --include="*.py" --include="*.js" --include="*.ts" 2>/dev/null | head -1000 || true)
  if [ -n "$matches" ]; then
    ALL_MATCHES="$ALL_MATCHES$matches\n"
@@ -123,16 +124,19 @@ for pattern in "${SUSPICIOUS_PATTERNS[@]}"; do
 done

 # Also check for specific problematic patterns in one pass
+# shellcheck disable=SC2086 # Word splitting is intentional for EXCLUDE_ARGS
 temp_matches=$(grep -rn -I $EXCLUDE_ARGS -E "tmp_path|tempfile|/tmp/" $SEARCH_PATHS --include="*.py" 2>/dev/null | head -500 || true)
 if [ -n "$temp_matches" ]; then
  ALL_MATCHES="$ALL_MATCHES$temp_matches\n"
 fi

+# shellcheck disable=SC2086 # Word splitting is intentional for EXCLUDE_ARGS
 db_matches=$(grep -rn -I $EXCLUDE_ARGS -E "report_content.*open|report_content.*write|markdown_content.*open|markdown_content.*write" $SEARCH_PATHS --include="*.py" 2>/dev/null | head -500 || true)
 if [ -n "$db_matches" ]; then
  ALL_MATCHES="$ALL_MATCHES$db_matches\n"
 fi

+# shellcheck disable=SC2086 # Word splitting is intentional for EXCLUDE_ARGS
 export_matches=$(grep -rn -I $EXCLUDE_ARGS -E "export.*Path|export.*path\.open|export.*\.write" $SEARCH_PATHS --include="*.py" 2>/dev/null | head -500 || true)
 if [ -n "$export_matches" ]; then
  ALL_MATCHES="$ALL_MATCHES$export_matches\n"
--- a/.github/scripts/file-whitelist-check.sh
+++ b/.github/scripts/file-whitelist-check.sh
@@ -56,13 +56,13 @@ if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then
 # For PRs: check all files that would be added/modified in the entire PR
 echo "🔍 Checking files in PR from $GITHUB_BASE_REF to HEAD..."

-CHANGED_FILES=$(git diff --name-only --diff-filter=AM origin/$GITHUB_BASE_REF..HEAD)
+CHANGED_FILES=$(git diff --name-only --diff-filter=AM origin/"$GITHUB_BASE_REF"..HEAD)
 FILE_COUNT=$(echo "$CHANGED_FILES" | wc -l)
 echo "📋 Found $FILE_COUNT changed files with git diff"

 # Also get newly added files across all commits in the PR
 # Use a more robust approach that handles edge cases
-ALL_NEW_FILES=$(git log --name-only --pretty=format: --diff-filter=A origin/$GITHUB_BASE_REF..HEAD 2>/dev/null | grep -v '^$' | sort | uniq || echo "")
+ALL_NEW_FILES=$(git log --name-only --pretty=format: --diff-filter=A origin/"$GITHUB_BASE_REF"..HEAD 2>/dev/null | grep -v '^$' | sort | uniq || echo "")
 NEW_FILE_COUNT=$(echo "$ALL_NEW_FILES" | wc -w)
 echo "📋 Found $NEW_FILE_COUNT newly added files with git log"

@@ -124,7 +124,7 @@ fi
 if [ -f "$file" ]; then
 FILE_SIZE=$(stat -c%s "$file" 2>/dev/null || echo 0)
 if [ "$FILE_SIZE" -gt 1048576 ]; then
-LARGE_FILES+=("$file ($(echo $FILE_SIZE | awk '{printf "%.1fMB", $1/1024/1024}'))")
+LARGE_FILES+=("$file ($(echo "$FILE_SIZE" | awk '{printf "%.1fMB", $1/1024/1024}'))")
 fi
 fi

@@ -344,7 +344,7 @@ FILE_TYPE=$(file -b "$violation" 2>/dev/null || echo "unknown")
 echo "     → File extension: .$FILE_EXT"
 echo "     → File type: $FILE_TYPE"
 echo "     → First few lines:"
-head -3 "$violation" 2>/dev/null | while read line; do
+head -3 "$violation" 2>/dev/null | while read -r line; do
 echo "       $line"
 done
 fi
@@ -394,7 +394,7 @@ echo "  🔍 EXAMINING: $violation"

 # Show the specific lines that triggered the detection
 echo "     → Suspicious content found:"
-grep -n -iE "(api[_-]?key|secret|password|token|private[_-]?key)" "$violation" 2>/dev/null | head -5 | while read line; do
+grep -n -iE "(api[_-]?key|secret|password|token|private[_-]?key)" "$violation" 2>/dev/null | head -5 | while read -r line; do
 echo "       $line"
 done

@@ -439,7 +439,7 @@ FILE_TYPE=$(file -b "$violation" 2>/dev/null || echo "unknown")
 FILE_SIZE=$(stat -c%s "$violation" 2>/dev/null || echo "unknown")
 echo "     → File info: $FILE_TYPE (${FILE_SIZE} bytes)"
 echo "     → Content preview:"
-head -3 "$violation" 2>/dev/null | while read line; do
+head -3 "$violation" 2>/dev/null | while read -r line; do
 echo "       $line"
 done
 fi
@@ -462,7 +462,7 @@ echo "  📊 $violation"

 # Show the specific lines with research data
 echo "     → Found hardcoded research data:"
-grep -n -E "(research_id|session_id|query_id).*=.*[\"'][0-9a-f]{8,}[\"']" "$violation" 2>/dev/null | head -3 | while read line; do
+grep -n -E "(research_id|session_id|query_id).*=.*[\"'][0-9a-f]{8,}[\"']" "$violation" 2>/dev/null | head -3 | while read -r line; do
 echo "       $line"
 done

@@ -483,7 +483,7 @@ echo "  🔐 $violation"

 # Show the specific lines with secret keys
 echo "     → Found hardcoded Flask secret key:"
-grep -n -E "SECRET_KEY.*=.*[\"'][^\"']{16,}[\"']" "$violation" 2>/dev/null | head -3 | while read line; do
+grep -n -E "SECRET_KEY.*=.*[\"'][^\"']{16,}[\"']" "$violation" 2>/dev/null | head -3 | while read -r line; do
 echo "       $line"
 done

@@ -518,7 +518,7 @@ echo "  🎲 $violation"

 # Show sample of high entropy strings
 echo "     → Found high-entropy strings:"
-grep -n -E "[a-zA-Z0-9+/]{40,}={0,2}|[a-f0-9]{40,}" "$violation" 2>/dev/null | head -3 | while read line; do
+grep -n -E "[a-zA-Z0-9+/]{40,}={0,2}|[a-f0-9]{40,}" "$violation" 2>/dev/null | head -3 | while read -r line; do
 # Truncate long lines for readability
 echo "       ${line:0:120}..."
 done
@@ -540,7 +540,7 @@ echo "  📁 $violation"

 # Show the specific hardcoded paths
 echo "     → Found hardcoded paths:"
-grep -n -E "(/home/[a-zA-Z0-9_-]+|/Users/[a-zA-Z0-9_-]+|C:\\\\Users\\\\[a-zA-Z0-9_-]+|/opt/|/var/|/etc/|/usr/local/)" "$violation" 2>/dev/null | head -5 | while read line; do
+grep -n -E "(/home/[a-zA-Z0-9_-]+|/Users/[a-zA-Z0-9_-]+|C:\\\\Users\\\\[a-zA-Z0-9_-]+|/opt/|/var/|/etc/|/usr/local/)" "$violation" 2>/dev/null | head -5 | while read -r line; do
 echo "       $line"
 done

@@ -561,7 +561,7 @@ echo "  🌐 $violation"

 # Show the specific IP addresses
 echo "     → Found hardcoded IP addresses:"
-grep -n -E "\b([0-9]{1,3}\.){3}[0-9]{1,3}\b" "$violation" 2>/dev/null | grep -v -E "(127\.0\.0\.1|0\.0\.0\.0|localhost|192\.168\.|10\.|172\.(1[6-9]|2[0-9]|3[0-1])\.|255\.255\.255\.|192\.0\.2\.|198\.51\.100\.|203\.0\.113\.)" | head -5 | while read line; do
+grep -n -E "\b([0-9]{1,3}\.){3}[0-9]{1,3}\b" "$violation" 2>/dev/null | grep -v -E "(127\.0\.0\.1|0\.0\.0\.0|localhost|192\.168\.|10\.|172\.(1[6-9]|2[0-9]|3[0-1])\.|255\.255\.255\.|192\.0\.2\.|198\.51\.100\.|203\.0\.113\.)" | head -5 | while read -r line; do
 echo "       $line"
 done

@@ -624,7 +624,7 @@ esac
 if [ -f "$FILE_PATH" ]; then
 FILE_SIZE=$(stat -c%s "$FILE_PATH" 2>/dev/null || echo "unknown")
 if [ "$FILE_SIZE" != "unknown" ]; then
-READABLE_SIZE=$(echo $FILE_SIZE | awk '{if($1>=1048576) printf "%.1fMB", $1/1048576; else if($1>=1024) printf "%.1fKB", $1/1024; else printf "%dB", $1}')
+READABLE_SIZE=$(echo "$FILE_SIZE" | awk '{if($1>=1048576) printf "%.1fMB", $1/1048576; else if($1>=1024) printf "%.1fKB", $1/1024; else printf "%dB", $1}')
 echo "     → File size: $READABLE_SIZE"
 fi
 fi