mirror of
https://github.com/LearningCircuit/local-deep-research.git
synced 2026-06-16 03:51:07 +03:00
Merge branch 'dev' into refactor/remove-dogpile-cache-add-stampede-protection
Resolve merge conflicts: - pyproject.toml: Keep flask-limiter from dev, remove dogpile-cache/redis/msgpack as intended - test_env_var_usage.py: Keep rate_limiter.py from dev, remove memory_cache/ as intended
This commit is contained in:
4
.github/scripts/check-file-writes.sh
vendored
4
.github/scripts/check-file-writes.sh
vendored
@@ -116,6 +116,7 @@ SEARCH_PATHS="src/"
|
||||
# Single pass to collect all matches
|
||||
for pattern in "${SUSPICIOUS_PATTERNS[@]}"; do
|
||||
# Use grep with binary files excluded and max line length to avoid issues with minified files
|
||||
# shellcheck disable=SC2086 # Word splitting is intentional for EXCLUDE_ARGS
|
||||
matches=$(grep -rn -I $EXCLUDE_ARGS -- "$pattern" $SEARCH_PATHS --include="*.py" --include="*.js" --include="*.ts" 2>/dev/null | head -1000 || true)
|
||||
if [ -n "$matches" ]; then
|
||||
ALL_MATCHES="$ALL_MATCHES$matches\n"
|
||||
@@ -123,16 +124,19 @@ for pattern in "${SUSPICIOUS_PATTERNS[@]}"; do
|
||||
done
|
||||
|
||||
# Also check for specific problematic patterns in one pass
|
||||
# shellcheck disable=SC2086 # Word splitting is intentional for EXCLUDE_ARGS
|
||||
temp_matches=$(grep -rn -I $EXCLUDE_ARGS -E "tmp_path|tempfile|/tmp/" $SEARCH_PATHS --include="*.py" 2>/dev/null | head -500 || true)
|
||||
if [ -n "$temp_matches" ]; then
|
||||
ALL_MATCHES="$ALL_MATCHES$temp_matches\n"
|
||||
fi
|
||||
|
||||
# shellcheck disable=SC2086 # Word splitting is intentional for EXCLUDE_ARGS
|
||||
db_matches=$(grep -rn -I $EXCLUDE_ARGS -E "report_content.*open|report_content.*write|markdown_content.*open|markdown_content.*write" $SEARCH_PATHS --include="*.py" 2>/dev/null | head -500 || true)
|
||||
if [ -n "$db_matches" ]; then
|
||||
ALL_MATCHES="$ALL_MATCHES$db_matches\n"
|
||||
fi
|
||||
|
||||
# shellcheck disable=SC2086 # Word splitting is intentional for EXCLUDE_ARGS
|
||||
export_matches=$(grep -rn -I $EXCLUDE_ARGS -E "export.*Path|export.*path\.open|export.*\.write" $SEARCH_PATHS --include="*.py" 2>/dev/null | head -500 || true)
|
||||
if [ -n "$export_matches" ]; then
|
||||
ALL_MATCHES="$ALL_MATCHES$export_matches\n"
|
||||
|
||||
24
.github/scripts/file-whitelist-check.sh
vendored
24
.github/scripts/file-whitelist-check.sh
vendored
@@ -56,13 +56,13 @@ if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then
|
||||
# For PRs: check all files that would be added/modified in the entire PR
|
||||
echo "🔍 Checking files in PR from $GITHUB_BASE_REF to HEAD..."
|
||||
|
||||
CHANGED_FILES=$(git diff --name-only --diff-filter=AM origin/$GITHUB_BASE_REF..HEAD)
|
||||
CHANGED_FILES=$(git diff --name-only --diff-filter=AM origin/"$GITHUB_BASE_REF"..HEAD)
|
||||
FILE_COUNT=$(echo "$CHANGED_FILES" | wc -l)
|
||||
echo "📋 Found $FILE_COUNT changed files with git diff"
|
||||
|
||||
# Also get newly added files across all commits in the PR
|
||||
# Use a more robust approach that handles edge cases
|
||||
ALL_NEW_FILES=$(git log --name-only --pretty=format: --diff-filter=A origin/$GITHUB_BASE_REF..HEAD 2>/dev/null | grep -v '^$' | sort | uniq || echo "")
|
||||
ALL_NEW_FILES=$(git log --name-only --pretty=format: --diff-filter=A origin/"$GITHUB_BASE_REF"..HEAD 2>/dev/null | grep -v '^$' | sort | uniq || echo "")
|
||||
NEW_FILE_COUNT=$(echo "$ALL_NEW_FILES" | wc -w)
|
||||
echo "📋 Found $NEW_FILE_COUNT newly added files with git log"
|
||||
|
||||
@@ -124,7 +124,7 @@ fi
|
||||
if [ -f "$file" ]; then
|
||||
FILE_SIZE=$(stat -c%s "$file" 2>/dev/null || echo 0)
|
||||
if [ "$FILE_SIZE" -gt 1048576 ]; then
|
||||
LARGE_FILES+=("$file ($(echo $FILE_SIZE | awk '{printf "%.1fMB", $1/1024/1024}'))")
|
||||
LARGE_FILES+=("$file ($(echo "$FILE_SIZE" | awk '{printf "%.1fMB", $1/1024/1024}'))")
|
||||
fi
|
||||
fi
|
||||
|
||||
@@ -344,7 +344,7 @@ FILE_TYPE=$(file -b "$violation" 2>/dev/null || echo "unknown")
|
||||
echo " → File extension: .$FILE_EXT"
|
||||
echo " → File type: $FILE_TYPE"
|
||||
echo " → First few lines:"
|
||||
head -3 "$violation" 2>/dev/null | while read line; do
|
||||
head -3 "$violation" 2>/dev/null | while read -r line; do
|
||||
echo " $line"
|
||||
done
|
||||
fi
|
||||
@@ -394,7 +394,7 @@ echo " 🔍 EXAMINING: $violation"
|
||||
|
||||
# Show the specific lines that triggered the detection
|
||||
echo " → Suspicious content found:"
|
||||
grep -n -iE "(api[_-]?key|secret|password|token|private[_-]?key)" "$violation" 2>/dev/null | head -5 | while read line; do
|
||||
grep -n -iE "(api[_-]?key|secret|password|token|private[_-]?key)" "$violation" 2>/dev/null | head -5 | while read -r line; do
|
||||
echo " $line"
|
||||
done
|
||||
|
||||
@@ -439,7 +439,7 @@ FILE_TYPE=$(file -b "$violation" 2>/dev/null || echo "unknown")
|
||||
FILE_SIZE=$(stat -c%s "$violation" 2>/dev/null || echo "unknown")
|
||||
echo " → File info: $FILE_TYPE (${FILE_SIZE} bytes)"
|
||||
echo " → Content preview:"
|
||||
head -3 "$violation" 2>/dev/null | while read line; do
|
||||
head -3 "$violation" 2>/dev/null | while read -r line; do
|
||||
echo " $line"
|
||||
done
|
||||
fi
|
||||
@@ -462,7 +462,7 @@ echo " 📊 $violation"
|
||||
|
||||
# Show the specific lines with research data
|
||||
echo " → Found hardcoded research data:"
|
||||
grep -n -E "(research_id|session_id|query_id).*=.*[\"'][0-9a-f]{8,}[\"']" "$violation" 2>/dev/null | head -3 | while read line; do
|
||||
grep -n -E "(research_id|session_id|query_id).*=.*[\"'][0-9a-f]{8,}[\"']" "$violation" 2>/dev/null | head -3 | while read -r line; do
|
||||
echo " $line"
|
||||
done
|
||||
|
||||
@@ -483,7 +483,7 @@ echo " 🔐 $violation"
|
||||
|
||||
# Show the specific lines with secret keys
|
||||
echo " → Found hardcoded Flask secret key:"
|
||||
grep -n -E "SECRET_KEY.*=.*[\"'][^\"']{16,}[\"']" "$violation" 2>/dev/null | head -3 | while read line; do
|
||||
grep -n -E "SECRET_KEY.*=.*[\"'][^\"']{16,}[\"']" "$violation" 2>/dev/null | head -3 | while read -r line; do
|
||||
echo " $line"
|
||||
done
|
||||
|
||||
@@ -518,7 +518,7 @@ echo " 🎲 $violation"
|
||||
|
||||
# Show sample of high entropy strings
|
||||
echo " → Found high-entropy strings:"
|
||||
grep -n -E "[a-zA-Z0-9+/]{40,}={0,2}|[a-f0-9]{40,}" "$violation" 2>/dev/null | head -3 | while read line; do
|
||||
grep -n -E "[a-zA-Z0-9+/]{40,}={0,2}|[a-f0-9]{40,}" "$violation" 2>/dev/null | head -3 | while read -r line; do
|
||||
# Truncate long lines for readability
|
||||
echo " ${line:0:120}..."
|
||||
done
|
||||
@@ -540,7 +540,7 @@ echo " 📁 $violation"
|
||||
|
||||
# Show the specific hardcoded paths
|
||||
echo " → Found hardcoded paths:"
|
||||
grep -n -E "(/home/[a-zA-Z0-9_-]+|/Users/[a-zA-Z0-9_-]+|C:\\\\Users\\\\[a-zA-Z0-9_-]+|/opt/|/var/|/etc/|/usr/local/)" "$violation" 2>/dev/null | head -5 | while read line; do
|
||||
grep -n -E "(/home/[a-zA-Z0-9_-]+|/Users/[a-zA-Z0-9_-]+|C:\\\\Users\\\\[a-zA-Z0-9_-]+|/opt/|/var/|/etc/|/usr/local/)" "$violation" 2>/dev/null | head -5 | while read -r line; do
|
||||
echo " $line"
|
||||
done
|
||||
|
||||
@@ -561,7 +561,7 @@ echo " 🌐 $violation"
|
||||
|
||||
# Show the specific IP addresses
|
||||
echo " → Found hardcoded IP addresses:"
|
||||
grep -n -E "\b([0-9]{1,3}\.){3}[0-9]{1,3}\b" "$violation" 2>/dev/null | grep -v -E "(127\.0\.0\.1|0\.0\.0\.0|localhost|192\.168\.|10\.|172\.(1[6-9]|2[0-9]|3[0-1])\.|255\.255\.255\.|192\.0\.2\.|198\.51\.100\.|203\.0\.113\.)" | head -5 | while read line; do
|
||||
grep -n -E "\b([0-9]{1,3}\.){3}[0-9]{1,3}\b" "$violation" 2>/dev/null | grep -v -E "(127\.0\.0\.1|0\.0\.0\.0|localhost|192\.168\.|10\.|172\.(1[6-9]|2[0-9]|3[0-1])\.|255\.255\.255\.|192\.0\.2\.|198\.51\.100\.|203\.0\.113\.)" | head -5 | while read -r line; do
|
||||
echo " $line"
|
||||
done
|
||||
|
||||
@@ -624,7 +624,7 @@ esac
|
||||
if [ -f "$FILE_PATH" ]; then
|
||||
FILE_SIZE=$(stat -c%s "$FILE_PATH" 2>/dev/null || echo "unknown")
|
||||
if [ "$FILE_SIZE" != "unknown" ]; then
|
||||
READABLE_SIZE=$(echo $FILE_SIZE | awk '{if($1>=1048576) printf "%.1fMB", $1/1048576; else if($1>=1024) printf "%.1fKB", $1/1024; else printf "%dB", $1}')
|
||||
READABLE_SIZE=$(echo "$FILE_SIZE" | awk '{if($1>=1048576) printf "%.1fMB", $1/1048576; else if($1>=1024) printf "%.1fKB", $1/1024; else printf "%dB", $1}')
|
||||
echo " → File size: $READABLE_SIZE"
|
||||
fi
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user