ring/default/hooks/claude-md-reminder.sh
Fred Amaral 9cb5a72737
fix(codereview): align reviewer references and harden lib-commons/multi-tenant agents
Propagates the 10-reviewer peer list across agent frontmatter, Position/Critical prose, shared-patterns, skill dispatchers, gate validators, and docs — resolving drift left behind when multi-tenant-reviewer and lib-commons-reviewer were added to the pool. Also fixes broken shared-pattern paths in lib-commons-reviewer and adds substantive blocker criteria to multi-tenant-reviewer plus codebase-context severity heuristic (Lerian third-rail vs external recommendation) to lib-commons-reviewer.

X-Lerian-Ref: 0x1
2026-04-18 20:18:16 -03:00

317 lines
12 KiB
Bash
Executable file

#!/usr/bin/env bash
# shellcheck disable=SC2034 # Unused variables OK for exported config
# UserPromptSubmit hook to periodically re-inject instruction files
# Combats context drift in long-running sessions by re-surfacing project instructions
# Supports: CLAUDE.md, AGENTS.md, PROJECT_RULES.md (dedupes symlinks to avoid double-injection)
set -euo pipefail
# Configuration constants
# Re-inject every 3 prompts - balances context freshness with token overhead
# Lower values = more frequent reminders but higher token cost
# Higher values = less overhead but risk of context being forgotten
readonly THROTTLE_INTERVAL=5
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
PROJECT_DIR="${CLAUDE_PROJECT_DIR:-.}"
# File types to discover
# PROJECT_RULES.md replaced RULES.md per Lerian standard (2026-04)
INSTRUCTION_FILES=("CLAUDE.md" "AGENTS.md" "PROJECT_RULES.md")
# Context window usage threshold (percentage). When harness reports context ≥ this value,
# force a refresh. Calibrated for 1M-token context windows: 15% ≈ 150k tokens consumed,
# which is already substantial drift territory. Lower = more aggressive refresh (safer,
# higher token cost). Raise if re-injection overhead becomes visible.
readonly CTX_PCT_THRESHOLD=30
# Transcript byte delta threshold (fallback when context_window field unavailable).
# ~200KB of transcript growth ≈ 50k tokens at 4 chars/token estimate.
readonly BYTE_THRESHOLD=300000
# Cooldown: non-temporal triggers require at least this many prompts since last injection.
# Prevents tight loops — a single injection adds 70KB+, context_pct would re-trigger immediately.
readonly MIN_PROMPT_COOLDOWN=3
# Use session-specific state files (per-session, not persistent)
# CLAUDE_SESSION_ID should be provided by Claude Code, fallback to PPID for session isolation
SESSION_ID="${CLAUDE_SESSION_ID:-$PPID}"
STATE_FILE="/tmp/claude-instruction-reminder-${SESSION_ID}.state"
BYTES_FILE="/tmp/claude-instruction-reminder-${SESSION_ID}.bytes"
LAST_INJECT_FILE="/tmp/claude-instruction-reminder-${SESSION_ID}.lastinject"
# Read UserPromptSubmit event JSON from stdin (non-blocking if no stdin).
# Claude Code provides: session_id, transcript_path, cwd, user_prompt, hook_event_name,
# and (recently added) context_window with .used_percentage / .used_tokens / .total_tokens.
hook_input=""
if [ ! -t 0 ]; then
hook_input=$(cat)
fi
# Extract context metrics from hook event JSON.
# PRIMARY signal: context_window.used_percentage (direct from harness, authoritative).
# SECONDARY signal: transcript_path → byte delta (heuristic fallback for older harness versions).
ctx_pct=0
ctx_tokens=0
transcript_path=""
if [ -n "$hook_input" ] && command -v jq >/dev/null 2>&1; then
ctx_pct=$(echo "$hook_input" | jq -r '.context_window.used_percentage // 0' 2>/dev/null || echo 0)
ctx_tokens=$(echo "$hook_input" | jq -r '.context_window.used_tokens // 0' 2>/dev/null || echo 0)
transcript_path=$(echo "$hook_input" | jq -r '.transcript_path // empty' 2>/dev/null || echo "")
fi
# Transcript byte measurement (fallback proxy when context_window unavailable).
current_bytes=0
if [ -n "$transcript_path" ] && [ -f "$transcript_path" ]; then
current_bytes=$(wc -c < "$transcript_path" 2>/dev/null | tr -d ' ' || echo 0)
fi
last_bytes=0
if [ -f "$BYTES_FILE" ]; then
last_bytes=$(cat "$BYTES_FILE" 2>/dev/null || echo 0)
fi
delta_bytes=$((current_bytes - last_bytes))
# Cumulative prompt count (for display and temporal trigger).
if [ -f "$STATE_FILE" ]; then
PROMPT_COUNT=$(cat "$STATE_FILE")
else
PROMPT_COUNT=0
fi
PROMPT_COUNT=$((PROMPT_COUNT + 1))
echo "$PROMPT_COUNT" > "$STATE_FILE"
# Prompts since last injection (for cooldown enforcement on non-temporal triggers).
LAST_INJECT_PROMPT=0
if [ -f "$LAST_INJECT_FILE" ]; then
LAST_INJECT_PROMPT=$(cat "$LAST_INJECT_FILE" 2>/dev/null || echo 0)
fi
prompts_since_inject=$((PROMPT_COUNT - LAST_INJECT_PROMPT))
# Trigger cascade (first match wins; order reflects signal quality and priority):
# 1. Temporal floor — guaranteed baseline, fires every THROTTLE_INTERVAL prompts.
# 2. Context-window saturation — most accurate signal, uses harness-reported usage.
# 3. Volumetric fallback — proxy via transcript bytes when context_window missing.
# Cooldown applies to (2) and (3) to prevent re-injection on consecutive prompts.
should_inject=false
trigger_reason=""
if [ $((PROMPT_COUNT % THROTTLE_INTERVAL)) -eq 0 ]; then
should_inject=true
trigger_reason="temporal (prompt ${PROMPT_COUNT})"
elif [ "$prompts_since_inject" -ge "$MIN_PROMPT_COOLDOWN" ] && [ "${ctx_pct:-0}" -ge "$CTX_PCT_THRESHOLD" ] 2>/dev/null; then
should_inject=true
trigger_reason="context-window (${ctx_pct}% used, ${ctx_tokens} tokens)"
elif [ "$prompts_since_inject" -ge "$MIN_PROMPT_COOLDOWN" ] && [ "$current_bytes" -gt 0 ] && [ "$delta_bytes" -gt "$BYTE_THRESHOLD" ]; then
should_inject=true
trigger_reason="volumetric (+${delta_bytes} bytes since last inject)"
fi
if [ "$should_inject" != true ]; then
# Not time to inject, return empty response.
cat <<EOF
{
"hookSpecificOutput": {
"hookEventName": "UserPromptSubmit"
}
}
EOF
exit 0
fi
# Injecting — record state for next invocation's delta/cooldown calculations.
echo "$PROMPT_COUNT" > "$LAST_INJECT_FILE"
if [ "$current_bytes" -gt 0 ]; then
echo "$current_bytes" > "$BYTES_FILE"
fi
# Time to inject! Find all instruction files
# Array to store all instruction file paths
declare -a instruction_files=()
# For each file type, discover global, project root, and subdirectories
for file_name in "${INSTRUCTION_FILES[@]}"; do
# 1. Global file (~/.claude/CLAUDE.md, ~/.claude/AGENTS.md, etc.)
global_file="${HOME}/.claude/${file_name}"
if [ -f "$global_file" ]; then
instruction_files+=("$global_file")
fi
# 2. Project root file
if [ -f "${PROJECT_DIR}/${file_name}" ]; then
instruction_files+=("${PROJECT_DIR}/${file_name}")
fi
# 3. All subdirectory files
# Use find to discover files in project tree (exclude hidden dirs and common ignores)
while IFS= read -r -d '' file; do
instruction_files+=("$file")
done < <(find "$PROJECT_DIR" \
-type f -not -type l \
-name "$file_name" \
-not -path "*/\.*" \
-not -path "*/node_modules/*" \
-not -path "*/vendor/*" \
-not -path "*/.venv/*" \
-not -path "*/dist/*" \
-not -path "*/build/*" \
-print0 2>/dev/null)
done
# Canonicalize path for symlink-aware dedup.
# Tries coreutils `realpath`, then Python, then raw path. Graceful fallback ensures
# the hook still functions (just without symlink-dedup) on minimal environments.
canonicalize_path() {
if command -v realpath >/dev/null 2>&1; then
realpath "$1" 2>/dev/null || echo "$1"
elif command -v python3 >/dev/null 2>&1; then
python3 -c "import os,sys; print(os.path.realpath(sys.argv[1]))" "$1" 2>/dev/null || echo "$1"
else
echo "$1"
fi
}
# JSON-encode file content for safe embedding in hook output.
# RFC 8259 mandates escaping all control characters U+0000..U+001F as \uXXXX when they
# lack short-form escapes (\n \r \t \b \f). Hand-rolled awk regex misses vertical tab,
# null bytes, ESC, etc. — CLAUDE.md files in the wild contain these (415 lines in Ring's
# project CLAUDE.md had unescaped control chars when audited). Cascade: jq → python3 → awk.
# jq -Rs: raw input, slurped as single string; `.` emits it as a JSON string literal.
# We strip the outer quotes to splice into a larger JSON string being built by the hook.
escape_for_json() {
local f="$1"
if command -v jq >/dev/null 2>&1; then
jq -Rs '.' < "$f" | sed -e '1s/^"//' -e '$s/"$//'
elif command -v python3 >/dev/null 2>&1; then
python3 -c '
import sys, json
with open(sys.argv[1], "r", encoding="utf-8", errors="replace") as fh:
# json.dumps wraps in quotes; strip them to match jq -Rs behavior
sys.stdout.write(json.dumps(fh.read())[1:-1])
' "$f"
else
# Last-resort awk escape. Known to miss some control characters — see RFC 8259.
awk '
BEGIN { ORS="" }
{
gsub(/\\/, "\\\\")
gsub(/"/, "\\\"")
gsub(/\t/, "\\t")
gsub(/\r/, "\\r")
gsub(/\f/, "\\f")
if (NR > 1) printf "\\n"
printf "%s", $0
}
END { printf "\\n" }
' "$f"
fi
}
# Dedup by canonical path to handle two cases:
# 1. Same file discovered via different methods (project root + find)
# 2. Symlinks (e.g., Ring convention: AGENTS.md -> CLAUDE.md) — avoid double-injection
# First occurrence wins, which preserves INSTRUCTION_FILES priority order
# (CLAUDE.md displayed before AGENTS.md when they share the same inode).
if [ "${#instruction_files[@]}" -gt 0 ]; then
unique_files=()
seen_paths=""
for f in "${instruction_files[@]}"; do
real_f=$(canonicalize_path "$f")
# Newline delimiters avoid substring collisions between similar paths
case "${seen_paths}" in
*$'\n'"${real_f}"$'\n'*)
# Already seen (likely symlink target) — skip silently
;;
*)
unique_files+=("$f")
seen_paths="${seen_paths}"$'\n'"${real_f}"$'\n'
;;
esac
done
instruction_files=("${unique_files[@]}")
fi
# Build reminder context
reminder="<instruction-files-reminder>\n"
reminder="${reminder}Re-reading instruction files to combat context drift — trigger: ${trigger_reason}\n\n"
for file in "${instruction_files[@]}"; do
# Get relative path for display
file_name=$(basename "$file")
if [[ "$file" == "${HOME}/.claude/"* ]]; then
display_path="~/.claude/${file_name} (global)"
else
# Create relative path (cross-platform compatible)
display_path="${file#$PROJECT_DIR/}"
# If the file IS the project dir (no relative path created), just show filename
if [[ "$display_path" == "$file" ]]; then
display_path="$file_name"
fi
fi
# Choose emoji based on file type
case "$file_name" in
CLAUDE.md)
emoji="📋"
;;
AGENTS.md)
emoji="🤖"
;;
PROJECT_RULES.md)
emoji="📜"
;;
*)
emoji="📄"
;;
esac
reminder="${reminder}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
reminder="${reminder}${emoji} ${display_path}\n"
reminder="${reminder}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
# JSON-encode file content (RFC 8259 compliant via jq/python3 cascade).
escaped_content=$(escape_for_json "$file")
reminder="${reminder}${escaped_content}\n\n"
done
reminder="${reminder}</instruction-files-reminder>\n"
# Add agent usage reminder (compact, ~200 tokens)
agent_reminder="<agent-usage-reminder>\n"
agent_reminder="${agent_reminder}CONTEXT CHECK: Before using Glob/Grep/Read chains, consider agents:\n\n"
agent_reminder="${agent_reminder}| Task | Agent |\n"
agent_reminder="${agent_reminder}|------|-------|\n"
agent_reminder="${agent_reminder}| Explore codebase | Explore |\n"
agent_reminder="${agent_reminder}| Multi-file search | Explore |\n"
agent_reminder="${agent_reminder}| Complex research | general-purpose |\n"
agent_reminder="${agent_reminder}| Code review | ALL 10 reviewers in PARALLEL (code, business-logic, security, test, nil-safety, consequences, dead-code, performance, multi-tenant, lib-commons) |\n"
agent_reminder="${agent_reminder}| Implementation plan | ring:write-plan |\n"
agent_reminder="${agent_reminder}| Deep architecture | ring:codebase-explorer |\n\n"
agent_reminder="${agent_reminder}**3-File Rule:** If reading >3 files, use an agent instead. 15x more context-efficient.\n"
agent_reminder="${agent_reminder}</agent-usage-reminder>\n"
reminder="${reminder}${agent_reminder}"
# Add duplication prevention reminder
duplication_guard="<duplication-prevention-guard>\n"
duplication_guard="${duplication_guard}**BEFORE ADDING CONTENT** to any file:\n"
duplication_guard="${duplication_guard}1. SEARCH FIRST: \`grep -r 'keyword' --include='*.md'\`\n"
duplication_guard="${duplication_guard}2. If exists -> REFERENCE it, don't copy\n"
duplication_guard="${duplication_guard}3. Canonical sources: CLAUDE.md (rules), docs/*.md (details)\n"
duplication_guard="${duplication_guard}4. NEVER duplicate - always link to single source of truth\n"
duplication_guard="${duplication_guard}</duplication-prevention-guard>\n"
reminder="${reminder}${duplication_guard}"
# Output hook response with injected context
cat <<EOF
{
"hookSpecificOutput": {
"hookEventName": "UserPromptSubmit",
"additionalContext": "${reminder}"
}
}
EOF
exit 0