fix(reboot): bound post reboot summary in slo exporter
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 1s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 1m3s
CD Pipeline / build-and-deploy (push) Successful in 5m31s
CD Pipeline / post-deploy-checks (push) Has been cancelled

This commit is contained in:
Your Name
2026-07-03 02:18:46 +08:00
parent aa5eddbe33
commit 502775460c
2 changed files with 20 additions and 2 deletions

View File

@@ -16,6 +16,7 @@ LOCK_FILE="${LOCK_FILE:-${LOG_DIR}/reboot_auto_recovery_slo.lock}"
STOCK_FRESHNESS_URL="${STOCK_FRESHNESS_URL:-https://stock.wooo.work/api/v1/system/freshness}"
STOCK_INGESTION_URL="${STOCK_INGESTION_URL:-https://stock.wooo.work/api/v1/system/ingestion}"
STOCK_READBACK_TIMEOUT_SECONDS="${STOCK_READBACK_TIMEOUT_SECONDS:-10}"
POST_REBOOT_READINESS_TIMEOUT_SECONDS="${POST_REBOOT_READINESS_TIMEOUT_SECONDS:-60}"
PUBLIC_MAINTENANCE_READBACK_TIMEOUT_SECONDS="${PUBLIC_MAINTENANCE_READBACK_TIMEOUT_SECONDS:-8}"
PUBLIC_MAINTENANCE_URLS="${PUBLIC_MAINTENANCE_URLS:-https://awoooi.wooo.work/api/v1/health https://awoooi.wooo.work/ https://stock.wooo.work/api/v1/system/freshness https://mo.wooo.work/health https://bitan.wooo.work/ https://www.tsenyang.com/}"
@@ -50,8 +51,23 @@ python3 "$ROOT_DIR/scripts/reboot-recovery/reboot-event-detector.py" \
--target-minutes "$TARGET_MINUTES" \
--output "$reboot_event_file" \
--prometheus-output "$reboot_event_prom" || true
ARTIFACT_DIR="$artifact_dir/post-reboot-readiness" \
bash "$ROOT_DIR/scripts/reboot-recovery/post-reboot-readiness-summary.sh" --no-color >"$summary_file" 2>&1 || true
post_reboot_summary_command=(
bash "$ROOT_DIR/scripts/reboot-recovery/post-reboot-readiness-summary.sh" --no-color
)
if command -v timeout >/dev/null 2>&1; then
ARTIFACT_DIR="$artifact_dir/post-reboot-readiness" \
timeout "${POST_REBOOT_READINESS_TIMEOUT_SECONDS}s" "${post_reboot_summary_command[@]}" \
>"$summary_file" 2>&1 \
|| printf 'POST_REBOOT_READINESS_SUMMARY_TIMEOUT=1\n' >>"$summary_file"
elif command -v gtimeout >/dev/null 2>&1; then
ARTIFACT_DIR="$artifact_dir/post-reboot-readiness" \
gtimeout "${POST_REBOOT_READINESS_TIMEOUT_SECONDS}s" "${post_reboot_summary_command[@]}" \
>"$summary_file" 2>&1 \
|| printf 'POST_REBOOT_READINESS_SUMMARY_TIMEOUT=1\n' >>"$summary_file"
else
ARTIFACT_DIR="$artifact_dir/post-reboot-readiness" \
"${post_reboot_summary_command[@]}" >"$summary_file" 2>&1 || true
fi
if command -v curl >/dev/null 2>&1; then
curl -fsS --max-time "$STOCK_READBACK_TIMEOUT_SECONDS" \

View File

@@ -107,6 +107,8 @@ def test_exporter_projects_each_scorecard_blocker_to_textfile_metric() -> None:
assert "WINDOWS99_MAX_AUTH_USERS=\"${WINDOWS99_MAX_AUTH_USERS:-5}\"" in text
assert "AWOOOI_WINDOWS99_VMWARE_AUTOSTART=1" in text
assert "scorecard_args+=(--windows99-vmware-file" in text
assert "POST_REBOOT_READINESS_TIMEOUT_SECONDS" in text
assert "POST_REBOOT_READINESS_SUMMARY_TIMEOUT=1" in text
assert "active_blocker_metrics" in text
assert 'for blocker in payload.get("active_blockers") or []' in text
assert "awoooi_reboot_auto_recovery_slo_active_blocker" in text