OpenMetadata/bin/distributed-test/scripts/benchmark-sizing.sh

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

715 lines
28 KiB
Bash
Raw Permalink Normal View History

#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PERF_TEST="$SCRIPT_DIR/perf-test.sh"
# ─── Defaults ───────────────────────────────────────────────────────────────────
SERVER="http://localhost:8585"
WORKERS=30
OUTPUT_DIR="./sizing-results"
START_SCALE="10k"
END_SCALE="xlarge"
MODES="seq,realistic"
ADMIN_PORT=""
TOKEN=""
RAMP=false
SKIP_EXISTING=false
MIXED=false
MIXED_DURATION=60
NO_BREAK=false
# ─── Scale Ladder (ordered) ────────────────────────────────────────────────────
ALL_SCALES=(10k 50k 100k 200k 500k large xlarge)
# ─── Colors ─────────────────────────────────────────────────────────────────────
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m'
# ─── Usage ──────────────────────────────────────────────────────────────────────
usage() {
cat <<'EOF'
Usage: benchmark-sizing.sh [OPTIONS]
Progressive benchmark runner that loops through scale tiers, runs perf-test.sh
at each tier, captures JSON outputs, and generates a final comparison report.
Scale Ladder: 10k → 50k → 100k → 200k → 500k → large (~2M) → xlarge (~5M)
OPTIONS:
--server URL Target server (default: http://localhost:8585)
--workers NUM Worker count for all runs (default: 30)
--output-dir DIR Directory for JSON reports + final summary (default: ./sizing-results)
--start-scale PRESET First scale tier to run (default: 10k)
--end-scale PRESET Last scale tier to run (default: xlarge)
--modes MODES Comma-separated: seq, realistic, or both (default: seq,realistic)
--admin-port PORT Pass through to perf-test.sh for server diagnostics
--token TOKEN Auth token pass-through
--ramp Run ramp test at first tier to find optimal workers
--skip-existing Skip tiers that already have JSON output in output-dir
--mixed Also run mixed read/write workload at each tier
--mixed-duration SECS Duration for mixed workload (default: 60)
--no-break Don't stop at break-points, run all tiers regardless
-h, --help Show this help message
EXAMPLES:
# Quick 2-tier test
benchmark-sizing.sh --start-scale 10k --end-scale 50k
# Full progressive benchmark with ramp and mixed workloads
benchmark-sizing.sh --ramp --mixed --output-dir /data/sizing
# Resume after failure (skips completed tiers)
benchmark-sizing.sh --skip-existing --output-dir ./sizing-results
# Single mode only
benchmark-sizing.sh --modes realistic --end-scale 500k
EOF
exit 0
}
# ─── Parse Arguments ────────────────────────────────────────────────────────────
while [[ $# -gt 0 ]]; do
case "$1" in
--server) SERVER="$2"; shift 2 ;;
--workers) WORKERS="$2"; shift 2 ;;
--output-dir) OUTPUT_DIR="$2"; shift 2 ;;
--start-scale) START_SCALE="$2"; shift 2 ;;
--end-scale) END_SCALE="$2"; shift 2 ;;
--modes) MODES="$2"; shift 2 ;;
--admin-port) ADMIN_PORT="$2"; shift 2 ;;
--token) TOKEN="$2"; shift 2 ;;
--ramp) RAMP=true; shift ;;
--skip-existing) SKIP_EXISTING=true; shift ;;
--mixed) MIXED=true; shift ;;
--mixed-duration) MIXED_DURATION="$2"; shift 2 ;;
--no-break) NO_BREAK=true; shift ;;
-h|--help) usage ;;
*) echo "Unknown option: $1"; usage ;;
esac
done
# ─── Validate ───────────────────────────────────────────────────────────────────
if [[ ! -f "$PERF_TEST" ]]; then
echo -e "${RED}Error: perf-test.sh not found at $PERF_TEST${NC}"
exit 1
fi
validate_scale() {
local scale="$1"
for s in "${ALL_SCALES[@]}"; do
[[ "$s" == "$scale" ]] && return 0
done
echo -e "${RED}Error: Invalid scale '$scale'. Valid: ${ALL_SCALES[*]}${NC}"
exit 1
}
validate_scale "$START_SCALE"
validate_scale "$END_SCALE"
# ─── Resolve Scale Range ───────────────────────────────────────────────────────
resolve_scales() {
local start="$1" end="$2"
local collecting=false
SCALES=()
for s in "${ALL_SCALES[@]}"; do
[[ "$s" == "$start" ]] && collecting=true
if $collecting; then
SCALES+=("$s")
fi
[[ "$s" == "$end" ]] && break
done
if [[ ${#SCALES[@]} -eq 0 ]]; then
echo -e "${RED}Error: No scales resolved between $start and $end${NC}"
exit 1
fi
}
resolve_scales "$START_SCALE" "$END_SCALE"
# ─── Parse Modes ────────────────────────────────────────────────────────────────
IFS=',' read -ra MODE_LIST <<< "$MODES"
for m in "${MODE_LIST[@]}"; do
if [[ "$m" != "seq" && "$m" != "realistic" ]]; then
echo -e "${RED}Error: Invalid mode '$m'. Valid: seq, realistic${NC}"
exit 1
fi
done
# ─── Setup Output Directory ────────────────────────────────────────────────────
mkdir -p "$OUTPUT_DIR"
SUMMARY_CSV="$OUTPUT_DIR/.sizing-progress.csv"
SUMMARY_MD="$OUTPUT_DIR/SIZING-SUMMARY.md"
LOG_FILE="$OUTPUT_DIR/benchmark-sizing.log"
log() {
local msg="[$(date '+%Y-%m-%d %H:%M:%S')] $*"
echo "$msg" >> "$LOG_FILE"
echo -e "$msg"
}
# ─── Print Banner ──────────────────────────────────────────────────────────────
echo ""
echo -e "${BOLD}══════════════════════════════════════════════════════════════════${NC}"
echo -e "${BOLD} OpenMetadata Progressive Cluster Sizing Benchmark${NC}"
echo -e "${BOLD}══════════════════════════════════════════════════════════════════${NC}"
echo ""
echo -e " Server: ${CYAN}$SERVER${NC}"
echo -e " Workers: ${CYAN}$WORKERS${NC}"
echo -e " Scale range: ${CYAN}${SCALES[*]}${NC}"
echo -e " Modes: ${CYAN}${MODE_LIST[*]}${NC}"
echo -e " Output: ${CYAN}$OUTPUT_DIR${NC}"
echo -e " Ramp test: ${CYAN}$RAMP${NC}"
echo -e " Mixed: ${CYAN}$MIXED${NC}"
echo -e " No-break: ${CYAN}$NO_BREAK${NC}"
[[ -n "$ADMIN_PORT" ]] && echo -e " Admin port: ${CYAN}$ADMIN_PORT${NC}"
echo ""
# ─── Connectivity Check ────────────────────────────────────────────────────────
log "Checking server connectivity..."
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 "$SERVER/api/v1/system/version" 2>/dev/null || echo "000")
if [[ "$HTTP_CODE" == "000" ]]; then
echo -e "${RED}Error: Cannot reach $SERVER (connection refused or timeout)${NC}"
exit 1
elif [[ "$HTTP_CODE" -ge 400 ]]; then
echo -e "${YELLOW}Warning: Server returned HTTP $HTTP_CODE (may need auth token)${NC}"
fi
log "Server reachable (HTTP $HTTP_CODE)"
# ─── Build perf-test.sh Base Args ──────────────────────────────────────────────
build_perf_args() {
local scale="$1" mode="$2" output_file="$3"
local args=()
args+=(--server "$SERVER")
args+=(--workers "$WORKERS")
args+=(--scale "$scale")
args+=(--output "$output_file")
[[ -n "$ADMIN_PORT" ]] && args+=(--admin-port "$ADMIN_PORT")
[[ -n "$TOKEN" ]] && args+=(--token "$TOKEN")
[[ "$mode" == "realistic" ]] && args+=(--realistic)
if $MIXED; then
args+=(--mixed)
args+=(--mixed-duration "$MIXED_DURATION")
fi
echo "${args[@]}"
}
# ─── Extract Metrics from JSON ──────────────────────────────────────────────────
extract_metrics() {
local json_file="$1"
if [[ ! -f "$json_file" ]]; then
echo "N/A,0,0,0,0,0,N/A"
return
fi
python3 -c "
import json, sys
with open('$json_file') as f:
data = json.load(f)
overall = data.get('overall', {})
sizing = data.get('cluster_sizing', {})
server = data.get('server_info', {})
total = overall.get('total_entities_created', 0)
rps = overall.get('overall_throughput_rps', 0)
error_rate = overall.get('overall_error_rate_pct', 0)
wall_clock = overall.get('total_wall_clock_s', 0)
assessment = sizing.get('assessment', 'unknown')
# Find max p95 and p99 across write entities
max_p95 = 0
max_p99 = 0
entities = data.get('entities', {})
for name, ent in entities.items():
if name.startswith('read_') or name.startswith('mixed_'):
continue
lat = ent.get('latency_ms', {})
p95 = lat.get('p95', 0)
p99 = lat.get('p99', 0)
if p95 > max_p95:
max_p95 = p95
if p99 > max_p99:
max_p99 = p99
print(f'{total},{rps:.1f},{max_p95:.0f},{max_p99:.0f},{error_rate:.2f},{wall_clock:.0f},{assessment}')
" 2>/dev/null || echo "N/A,0,0,0,0,0,error"
}
# ─── Extract Config Summary from JSON ───────────────────────────────────────────
extract_config() {
local json_file="$1"
if [[ ! -f "$json_file" ]]; then
echo "N/A"
return
fi
python3 -c "
import json
with open('$json_file') as f:
data = json.load(f)
cs = data.get('cluster_sizing', {}).get('config_summary', [])
print('; '.join(cs) if cs else 'N/A')
" 2>/dev/null || echo "N/A"
}
# ─── Extract Server Info ────────────────────────────────────────────────────────
extract_server_info() {
local json_file="$1"
python3 -c "
import json
with open('$json_file') as f:
data = json.load(f)
si = data.get('server_info', {})
version = si.get('version', 'unknown')
diag = data.get('diagnostics_before', {})
jvm = diag.get('jvm', {})
heap_max = jvm.get('heap_max_bytes', 0)
heap_gb = heap_max / (1024**3) if heap_max else 0
jetty = diag.get('jetty', {})
max_threads = jetty.get('threads_max', 'unknown')
db = diag.get('database', {})
db_pool = db.get('pool_max', 'unknown')
print(f'Version: {version}')
print(f'Heap: {heap_gb:.1f}GB')
print(f'Threads: {max_threads}')
print(f'DB Pool: {db_pool}')
" 2>/dev/null || echo "Version: unknown"
}
# ─── Break-point Detection ──────────────────────────────────────────────────────
PREV_RPS_PER_ENTITY=""
is_broken() {
local metrics="$1"
IFS=',' read -r total rps p95 p99 error_rate wall_clock assessment <<< "$metrics"
[[ "$assessment" == "undersized" ]] && return 0
if python3 -c "exit(0 if float('$error_rate') > 10 else 1)" 2>/dev/null; then
return 0
fi
if python3 -c "exit(0 if float('$p95') > 10000 else 1)" 2>/dev/null; then
return 0
fi
if [[ -n "$PREV_RPS_PER_ENTITY" && "$total" != "0" && "$total" != "N/A" ]]; then
if python3 -c "
curr_rpe = float('$rps') / max(float('$total'), 1)
prev_rpe = float('$PREV_RPS_PER_ENTITY')
exit(0 if prev_rpe > 0 and curr_rpe / prev_rpe < 0.5 else 1)
" 2>/dev/null; then
return 0
fi
fi
return 1
}
# ─── Results Tracking ──────────────────────────────────────────────────────────
declare -a RESULT_LINES=()
BREAK_SCALE=""
BREAK_MODE=""
BREAK_REASON=""
add_result() {
local scale="$1" mode="$2" metrics="$3" config="$4"
RESULT_LINES+=("$scale,$mode,$metrics,$config")
}
# ─── Optional Ramp Test ────────────────────────────────────────────────────────
if $RAMP; then
FIRST_SCALE="${SCALES[0]}"
log "${BOLD}Running ramp test at scale $FIRST_SCALE to find optimal workers...${NC}"
RAMP_OUTPUT="$OUTPUT_DIR/sizing-${FIRST_SCALE}-ramp.json"
RAMP_ARGS=(--server "$SERVER" --workers "$WORKERS" --scale "$FIRST_SCALE" --ramp --output "$RAMP_OUTPUT")
[[ -n "$ADMIN_PORT" ]] && RAMP_ARGS+=(--admin-port "$ADMIN_PORT")
[[ -n "$TOKEN" ]] && RAMP_ARGS+=(--token "$TOKEN")
if $SKIP_EXISTING && [[ -f "$RAMP_OUTPUT" ]]; then
log "Ramp output exists, skipping (--skip-existing)"
else
log "Running: perf-test.sh ${RAMP_ARGS[*]}"
if "$PERF_TEST" "${RAMP_ARGS[@]}" 2>&1 | tee -a "$LOG_FILE"; then
OPTIMAL=$(python3 -c "
import json
with open('$RAMP_OUTPUT') as f:
data = json.load(f)
rt = data.get('ramp_test', {})
print(rt.get('optimal_workers', $WORKERS))
" 2>/dev/null || echo "$WORKERS")
log "${GREEN}Ramp test complete. Optimal workers: $OPTIMAL${NC}"
echo -e "\n${YELLOW}Ramp test suggests $OPTIMAL workers. Current setting: $WORKERS${NC}"
echo -e "${YELLOW}To use the suggested value, re-run with --workers $OPTIMAL${NC}\n"
else
log "${YELLOW}Ramp test failed, continuing with $WORKERS workers${NC}"
fi
fi
fi
# ─── Main Benchmark Loop ──────────────────────────────────────────────────────
BENCHMARK_START=$(date +%s)
TOTAL_TIERS=${#SCALES[@]}
TIER_NUM=0
BROKEN=false
for scale in "${SCALES[@]}"; do
TIER_NUM=$((TIER_NUM + 1))
echo ""
echo -e "${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BOLD} Tier $TIER_NUM/$TOTAL_TIERS: scale=$scale${NC}"
echo -e "${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
for mode in "${MODE_LIST[@]}"; do
OUTPUT_FILE="$OUTPUT_DIR/sizing-${scale}-${mode}.json"
MODE_LABEL=$([[ "$mode" == "seq" ]] && echo "sequential" || echo "realistic")
log " [$scale / $MODE_LABEL] Starting..."
if $SKIP_EXISTING && [[ -f "$OUTPUT_FILE" ]]; then
log " [$scale / $MODE_LABEL] Output exists, skipping (--skip-existing)"
METRICS=$(extract_metrics "$OUTPUT_FILE")
CONFIG=$(extract_config "$OUTPUT_FILE")
add_result "$scale" "$mode" "$METRICS" "$CONFIG"
IFS=',' read -r _total _rps _p95 _p99 _err _wall _assess <<< "$METRICS"
echo -e " ${GREEN}[SKIP]${NC} $scale/$MODE_LABEL: ${_total} entities, ${_rps} RPS, p95=${_p95}ms, err=${_err}%, ${_assess}"
continue
fi
PERF_ARGS=$(build_perf_args "$scale" "$mode" "$OUTPUT_FILE")
log " Running: perf-test.sh $PERF_ARGS"
RUN_START=$(date +%s)
EXIT_CODE=0
# shellcheck disable=SC2086
"$PERF_TEST" $PERF_ARGS 2>&1 | tee -a "$LOG_FILE" || EXIT_CODE=$?
RUN_END=$(date +%s)
RUN_DURATION=$((RUN_END - RUN_START))
if [[ $EXIT_CODE -ne 0 ]]; then
log " ${RED}[$scale / $MODE_LABEL] FAILED (exit code $EXIT_CODE) after ${RUN_DURATION}s${NC}"
add_result "$scale" "$mode" "FAILED,0,0,0,100,${RUN_DURATION},failed" "N/A"
BREAK_SCALE="$scale"
BREAK_MODE="$mode"
BREAK_REASON="perf-test.sh exited with code $EXIT_CODE"
BROKEN=true
break
fi
METRICS=$(extract_metrics "$OUTPUT_FILE")
CONFIG=$(extract_config "$OUTPUT_FILE")
add_result "$scale" "$mode" "$METRICS" "$CONFIG"
IFS=',' read -r _total _rps _p95 _p99 _err _wall _assess <<< "$METRICS"
log " ${GREEN}[$scale / $MODE_LABEL] Done:${NC} ${_total} entities, ${_rps} RPS, p95=${_p95}ms, err=${_err}%, ${_assess} (${RUN_DURATION}s)"
if is_broken "$METRICS"; then
if [[ "$_assess" == "undersized" ]]; then
reason="Cluster assessed as undersized"
elif python3 -c "exit(0 if float('$_err') > 10 else 1)" 2>/dev/null; then
reason="Error rate ${_err}% exceeds 10% threshold"
elif python3 -c "exit(0 if float('$_p95') > 10000 else 1)" 2>/dev/null; then
reason="p95 latency ${_p95}ms exceeds 10s threshold"
else
reason="Throughput degraded >50% from previous tier"
fi
if $NO_BREAK; then
echo -e "\n ${YELLOW}${BOLD}BREAK-POINT WOULD FIRE at $scale/$MODE_LABEL: $reason (--no-break, continuing)${NC}\n"
log "BREAK-POINT SUPPRESSED at $scale/$MODE_LABEL: $reason (--no-break)"
if [[ -z "$BREAK_SCALE" ]]; then
BREAK_SCALE="$scale"
BREAK_MODE="$mode"
BREAK_REASON="$reason (continued with --no-break)"
fi
else
BREAK_SCALE="$scale"
BREAK_MODE="$mode"
BREAK_REASON="$reason"
echo -e "\n ${RED}${BOLD}BREAK-POINT DETECTED at $scale/$MODE_LABEL: $BREAK_REASON${NC}\n"
log "BREAK-POINT DETECTED at $scale/$MODE_LABEL: $BREAK_REASON"
BROKEN=true
break
fi
fi
if [[ "$_total" != "0" && "$_total" != "N/A" ]]; then
PREV_RPS_PER_ENTITY=$(python3 -c "print(float('$_rps') / max(float('$_total'), 1))" 2>/dev/null || echo "")
fi
done
if $BROKEN; then
break
fi
# Tier comparison
if [[ ${#MODE_LIST[@]} -gt 1 ]]; then
SEQ_FILE="$OUTPUT_DIR/sizing-${scale}-seq.json"
REAL_FILE="$OUTPUT_DIR/sizing-${scale}-realistic.json"
if [[ -f "$SEQ_FILE" && -f "$REAL_FILE" ]]; then
echo ""
echo -e " ${CYAN}Tier $scale comparison:${NC}"
SEQ_M=$(extract_metrics "$SEQ_FILE")
REAL_M=$(extract_metrics "$REAL_FILE")
IFS=',' read -r s_total s_rps s_p95 s_p99 s_err s_wall s_assess <<< "$SEQ_M"
IFS=',' read -r r_total r_rps r_p95 r_p99 r_err r_wall r_assess <<< "$REAL_M"
printf " %-12s %8s %8s %8s %8s %10s\n" "" "Entities" "RPS" "p95ms" "Errors%" "Assessment"
printf " %-12s %8s %8s %8s %8s %10s\n" "Sequential" "$s_total" "$s_rps" "$s_p95" "${s_err}%" "$s_assess"
printf " %-12s %8s %8s %8s %8s %10s\n" "Realistic" "$r_total" "$r_rps" "$r_p95" "${r_err}%" "$r_assess"
echo ""
fi
fi
done
BENCHMARK_END=$(date +%s)
BENCHMARK_DURATION=$((BENCHMARK_END - BENCHMARK_START))
# ─── Generate SIZING-SUMMARY.md ────────────────────────────────────────────────
log "Generating $SUMMARY_MD..."
# Try to get server info from the first available JSON
SERVER_INFO=""
for scale in "${SCALES[@]}"; do
for mode in "${MODE_LIST[@]}"; do
F="$OUTPUT_DIR/sizing-${scale}-${mode}.json"
if [[ -f "$F" ]]; then
SERVER_INFO=$(extract_server_info "$F")
break 2
fi
done
done
{
echo "# OpenMetadata Cluster Sizing Summary"
echo ""
echo "Generated: $(date '+%Y-%m-%d %H:%M:%S')"
echo "Total benchmark time: ${BENCHMARK_DURATION}s ($((BENCHMARK_DURATION / 60))m $((BENCHMARK_DURATION % 60))s)"
echo ""
echo "## Server Configuration"
echo ""
echo '```'
echo "Server: $SERVER"
echo "Workers: $WORKERS"
if [[ -n "$SERVER_INFO" ]]; then
echo "$SERVER_INFO"
fi
echo '```'
echo ""
echo "## Progressive Results"
echo ""
echo "| Scale | Mode | Entities | RPS | p95 (ms) | p99 (ms) | Errors % | Assessment | Duration |"
echo "|-------|------|----------|-----|----------|----------|----------|------------|----------|"
LAST_ADEQUATE_SCALE=""
LAST_ADEQUATE_CONFIG=""
for line in "${RESULT_LINES[@]}"; do
IFS=',' read -r scale mode total rps p95 p99 err wall assess config <<< "$line"
mode_label=$([[ "$mode" == "seq" ]] && echo "Sequential" || echo "Realistic")
# Format duration
if [[ "$wall" =~ ^[0-9]+$ ]]; then
duration="${wall}s"
else
duration="$wall"
fi
# Assessment emoji
case "$assess" in
adequate) assess_fmt="$assess" ;;
marginal) assess_fmt="$assess" ;;
undersized) assess_fmt="$assess" ;;
failed) assess_fmt="FAILED" ;;
*) assess_fmt="$assess" ;;
esac
echo "| $scale | $mode_label | $total | $rps | $p95 | $p99 | $err | $assess_fmt | $duration |"
if [[ "$assess" == "adequate" || "$assess" == "marginal" ]]; then
LAST_ADEQUATE_SCALE="$scale"
LAST_ADEQUATE_CONFIG="$config"
fi
done
echo ""
# Break-point section
if [[ -n "$BREAK_SCALE" ]]; then
echo "## Break-Point Detected"
echo ""
echo "**Scale:** $BREAK_SCALE "
echo "**Mode:** $([[ "$BREAK_MODE" == "seq" ]] && echo "Sequential" || echo "Realistic") "
echo "**Reason:** $BREAK_REASON"
echo ""
if [[ -n "$LAST_ADEQUATE_SCALE" ]]; then
echo "The cluster handled **$LAST_ADEQUATE_SCALE** scale adequately but broke at **$BREAK_SCALE**."
echo ""
fi
else
echo "## Result"
echo ""
echo "No break-point detected. The cluster handled all tested scales up to **${SCALES[${#SCALES[@]}-1]}**."
echo ""
fi
# Recommended configuration
if [[ -n "$LAST_ADEQUATE_CONFIG" && "$LAST_ADEQUATE_CONFIG" != "N/A" ]]; then
echo "## Recommended Configuration"
echo ""
echo "Based on the last adequate tier ($LAST_ADEQUATE_SCALE):"
echo ""
echo '```bash'
IFS=';' read -ra CONFIGS <<< "$LAST_ADEQUATE_CONFIG"
for cfg in "${CONFIGS[@]}"; do
cfg_trimmed=$(echo "$cfg" | xargs)
[[ -n "$cfg_trimmed" ]] && echo "export $cfg_trimmed"
done
echo '```'
echo ""
fi
# Sequential vs Realistic comparison
if [[ ${#MODE_LIST[@]} -gt 1 ]]; then
echo "## Sequential vs Realistic Comparison"
echo ""
echo "| Scale | Seq RPS | Real RPS | RPS Diff | Seq p95 | Real p95 | p95 Diff |"
echo "|-------|---------|----------|----------|---------|----------|----------|"
for scale in "${SCALES[@]}"; do
SEQ_FILE="$OUTPUT_DIR/sizing-${scale}-seq.json"
REAL_FILE="$OUTPUT_DIR/sizing-${scale}-realistic.json"
if [[ -f "$SEQ_FILE" && -f "$REAL_FILE" ]]; then
python3 -c "
import json
with open('$SEQ_FILE') as f:
seq = json.load(f)
with open('$REAL_FILE') as f:
real = json.load(f)
s_rps = seq.get('overall', {}).get('overall_throughput_rps', 0)
r_rps = real.get('overall', {}).get('overall_throughput_rps', 0)
rps_diff = ((r_rps - s_rps) / s_rps * 100) if s_rps > 0 else 0
# Max p95
def max_p95(data):
mx = 0
for name, ent in data.get('entities', {}).items():
if name.startswith('read_') or name.startswith('mixed_'):
continue
p = ent.get('latency_ms', {}).get('p95', 0)
if p > mx:
mx = p
return mx
s_p95 = max_p95(seq)
r_p95 = max_p95(real)
p95_diff = ((r_p95 - s_p95) / s_p95 * 100) if s_p95 > 0 else 0
print(f'| $scale | {s_rps:.1f} | {r_rps:.1f} | {rps_diff:+.1f}% | {s_p95:.0f} | {r_p95:.0f} | {p95_diff:+.1f}% |')
" 2>/dev/null || true
fi
done
echo ""
echo "> **Sequential** runs entity types one at a time. **Realistic** runs all concurrently through a shared worker pool, exposing cross-entity contention."
echo ""
fi
# Individual tier details
echo "## Tier Details"
echo ""
for scale in "${SCALES[@]}"; do
for mode in "${MODE_LIST[@]}"; do
F="$OUTPUT_DIR/sizing-${scale}-${mode}.json"
[[ ! -f "$F" ]] && continue
mode_label=$([[ "$mode" == "seq" ]] && echo "Sequential" || echo "Realistic")
echo "### $scale ($mode_label)"
echo ""
python3 -c "
import json
with open('$F') as f:
data = json.load(f)
entities = data.get('entities', {})
print('| Entity | Count | RPS | p50 | p95 | p99 | Errors % |')
print('|--------|-------|-----|-----|-----|-----|----------|')
for name, ent in sorted(entities.items()):
count = ent.get('created', ent.get('total_requests', 0))
rps = ent.get('throughput_rps', 0)
lat = ent.get('latency_ms', {})
p50 = lat.get('p50', 0)
p95 = lat.get('p95', 0)
p99 = lat.get('p99', 0)
err = ent.get('error_rate_pct', 0)
print(f'| {name} | {count} | {rps:.1f} | {p50:.0f} | {p95:.0f} | {p99:.0f} | {err:.2f} |')
# Sizing findings
sizing = data.get('cluster_sizing', {})
findings = sizing.get('findings', [])
if findings:
print()
print('**Findings:**')
for f in findings:
print(f'- {f}')
" 2>/dev/null || echo "_Failed to parse results_"
echo ""
done
done
echo "---"
echo ""
echo "Generated by \`benchmark-sizing.sh\` on $(date '+%Y-%m-%d %H:%M:%S')"
} > "$SUMMARY_MD"
# ─── Final Console Summary ─────────────────────────────────────────────────────
echo ""
echo -e "${BOLD}══════════════════════════════════════════════════════════════════${NC}"
echo -e "${BOLD} SIZING BENCHMARK COMPLETE${NC}"
echo -e "${BOLD}══════════════════════════════════════════════════════════════════${NC}"
echo ""
printf " %-8s %-12s %8s %8s %8s %8s %10s\n" "Scale" "Mode" "Entities" "RPS" "p95ms" "Errors%" "Assessment"
printf " %-8s %-12s %8s %8s %8s %8s %10s\n" "────────" "────────────" "────────" "────────" "────────" "────────" "──────────"
for line in "${RESULT_LINES[@]}"; do
IFS=',' read -r scale mode total rps p95 p99 err wall assess config <<< "$line"
mode_label=$([[ "$mode" == "seq" ]] && echo "Sequential" || echo "Realistic")
case "$assess" in
adequate) color="$GREEN" ;;
marginal) color="$YELLOW" ;;
undersized) color="$RED" ;;
failed) color="$RED" ;;
*) color="$NC" ;;
esac
printf " %-8s %-12s %8s %8s %8s %8s ${color}%10s${NC}\n" \
"$scale" "$mode_label" "$total" "$rps" "$p95" "${err}%" "$assess"
done
echo ""
if [[ -n "$BREAK_SCALE" ]]; then
echo -e " ${RED}${BOLD}Break-point: $BREAK_SCALE ($BREAK_MODE) — $BREAK_REASON${NC}"
if [[ -n "$LAST_ADEQUATE_SCALE" ]]; then
echo -e " ${GREEN}Last adequate scale: $LAST_ADEQUATE_SCALE${NC}"
fi
else
echo -e " ${GREEN}No break-point detected — cluster handled all scales through ${SCALES[${#SCALES[@]}-1]}${NC}"
fi
echo ""
echo -e " Total time: ${BENCHMARK_DURATION}s ($((BENCHMARK_DURATION / 60))m $((BENCHMARK_DURATION % 60))s)"
echo -e " Summary: ${CYAN}$SUMMARY_MD${NC}"
echo -e " Log: ${CYAN}$LOG_FILE${NC}"
echo ""