fix(ci): fail closed host runner pressure guards [skip ci]

This commit is contained in:
Your Name
2026-06-28 08:38:15 +08:00
parent e090cb21e9
commit 54eea881e3
3 changed files with 32 additions and 46 deletions

View File

@@ -10,14 +10,12 @@ set -euo pipefail
# 2026-06-28 Codex: CD trigger after opening the AWOOI direct runner warn-only guard.
# 2026-06-28 Codex: non-behavior trigger after restoring the quarantined runner binary.
# 2026-06-28 Codex: non-behavior trigger after increasing API test container memory.
# 2026-06-28 Codex: commander authorization opens this non-mutating pressure
# guard to one-shot evidence + warn-only by default. Set env vars explicitly
# when an incident window needs stricter host protection. Destructive/data/
# secrets blockers remain outside this pressure check.
# 2026-06-28 Codex: host 110 runner pressure remains an incident-grade guard.
# Controlled apply is open, but this pressure gate stays fail-closed by default.
ATTEMPTS="${HOST_WEB_BUILD_PRESSURE_ATTEMPTS:-${HOST_WEB_BUILD_PRESSURE_MAX_ATTEMPTS:-1}}"
SLEEP_SECONDS="${HOST_WEB_BUILD_PRESSURE_SLEEP_SECONDS:-${HOST_WEB_BUILD_PRESSURE_INTERVAL:-3}}"
WARN_ONLY="${HOST_WEB_BUILD_PRESSURE_WARN_ONLY:-1}"
ATTEMPTS="${HOST_WEB_BUILD_PRESSURE_ATTEMPTS:-${HOST_WEB_BUILD_PRESSURE_MAX_ATTEMPTS:-60}}"
SLEEP_SECONDS="${HOST_WEB_BUILD_PRESSURE_SLEEP_SECONDS:-${HOST_WEB_BUILD_PRESSURE_INTERVAL:-10}}"
WARN_ONLY="${HOST_WEB_BUILD_PRESSURE_WARN_ONLY:-0}"
MAX_LOAD5_PER_CORE="${HOST_WEB_BUILD_PRESSURE_MAX_LOAD5_PER_CORE:-0.85}"
MAX_CI_CPU_PERCENT="${HOST_WEB_BUILD_PRESSURE_MAX_CI_CPU_PERCENT:-250}"
# One Gitea Actions task container/process group is the current job itself.
@@ -187,7 +185,7 @@ done
echo "⚠️ host web/build/smoke pressure still active after ${ATTEMPTS} checks"
if [ "$WARN_ONLY" = "1" ]; then
echo "⚠️ continuing under commander controlled automation; pressure evidence was captured"
echo "⚠️ continuing to avoid a stuck deploy; see ops/runner/README.md for the runner isolation plan"
exit 0
fi

View File

@@ -185,24 +185,19 @@ fi
# STEP 6: Gitea Act RunnerCI/CD 核心)
# 2026-04-05 Claude Code: 加入 — 解決重開機後 Gitea runner 離線、CD 失效
# 2026-06-27 Codex: 110 runner labels 收斂,避免接泛用 shared CI。
# 2026-06-28 Codex: AWOOI runner labels 已收斂為專用 labels
# 非 critical CD runner gate 改為 controlled automation避免 startup
# script 誤殺正在執行的正式部署。sentinel 僅在明確要求時作為第二鑰匙。
# 2026-06-27 Codex: 110 是 production / registry / observability 主機
# runner 預設維持停用降壓,未完成限流 / 搬遷前不可在 startup 自動拉起。
# ──────────────────────────────────────────────
log "[6/6] 檢查 Gitea Act Runner預設受控啟動)..."
log "[6/6] 檢查 Gitea Act Runner預設不自動啟動)..."
RUNNER_DIR="/home/wooo/act-runner"
RUNNER_SERVICE="gitea-act-runner-host.service"
RUNNER_ENABLE_SENTINEL="${AWOOOI_RUNNER_ENABLE_SENTINEL:-/run/awoooi-runner-host-enabled}"
START_GITEA_RUNNER_ON_BOOT="${AWOOOI_START_GITEA_RUNNER_ON_BOOT:-1}"
REQUIRE_RUNNER_ENABLE_SENTINEL="${AWOOOI_REQUIRE_RUNNER_ENABLE_SENTINEL:-0}"
STOP_GITEA_RUNNER_WHEN_DISABLED="${AWOOOI_STOP_GITEA_RUNNER_WHEN_DISABLED:-0}"
RUNNER_ENABLE_SENTINEL="/run/awoooi-runner-host-enabled"
START_GITEA_RUNNER_ON_BOOT="${AWOOOI_START_GITEA_RUNNER_ON_BOOT:-0}"
START_GITEA_RUNNER_ALLOWED=0
if [ "$START_GITEA_RUNNER_ON_BOOT" = "1" ]; then
if [ "$REQUIRE_RUNNER_ENABLE_SENTINEL" = "1" ] && [ ! -e "$RUNNER_ENABLE_SENTINEL" ]; then
START_GITEA_RUNNER_ALLOWED=0
else
START_GITEA_RUNNER_ALLOWED=1
fi
# The runtime operator sentinel is the second key for an authorized deployment
# window. A single env var or a stale sentinel alone must not reopen host CI.
if [ "$START_GITEA_RUNNER_ON_BOOT" = "1" ] && [ -e "$RUNNER_ENABLE_SENTINEL" ]; then
START_GITEA_RUNNER_ALLOWED=1
fi
if [ -x "$RUNNER_DIR/act_runner" ] && [ -f "$RUNNER_DIR/config.yaml" ]; then
# 若舊的 .runner 配置指向過期 hostname只有在明確允許啟動 runner
@@ -271,19 +266,14 @@ PY
nohup "$RUNNER_DIR/run-host-runner.sh" >> "$RUNNER_DIR/host-runner.log" 2>&1 &
fi
else
if [ "$START_GITEA_RUNNER_ON_BOOT" = "1" ] && [ "$REQUIRE_RUNNER_ENABLE_SENTINEL" = "1" ]; then
log "⛔ AWOOOI_START_GITEA_RUNNER_ON_BOOT=1 但缺少 $RUNNER_ENABLE_SENTINELrunner startup 暫停"
if [ "$START_GITEA_RUNNER_ON_BOOT" = "1" ]; then
log "⛔ AWOOOI_START_GITEA_RUNNER_ON_BOOT=1 但缺少 $RUNNER_ENABLE_SENTINELrunner fail-closed"
else
log "⏸️ Gitea host runner 本次不啟動;AWOOOI_START_GITEA_RUNNER_ON_BOOT=1 可重新打開"
fi
if [ "$STOP_GITEA_RUNNER_WHEN_DISABLED" = "1" ]; then
log "⚠️ AWOOI_STOP_GITEA_RUNNER_WHEN_DISABLED=1停止 runner"
systemctl disable --now "$RUNNER_SERVICE" >/dev/null 2>&1 || true
systemctl kill -s SIGKILL "$RUNNER_SERVICE" >/dev/null 2>&1 || true
pkill -KILL -f "$RUNNER_DIR/act_runner daemon" >/dev/null 2>&1 || true
else
log "✅ 不停止既有 runner避免中斷正在執行的 CD / post-deploy job"
log "⏸️ Gitea host runner 維持停用;需同時設定 AWOOOI_START_GITEA_RUNNER_ON_BOOT=1 與建立 $RUNNER_ENABLE_SENTINEL 才允許 startup 啟動"
fi
systemctl disable --now "$RUNNER_SERVICE" >/dev/null 2>&1 || true
systemctl kill -s SIGKILL "$RUNNER_SERVICE" >/dev/null 2>&1 || true
pkill -KILL -f "$RUNNER_DIR/act_runner daemon" >/dev/null 2>&1 || true
fi
# 已停用 Docker-wrapped runner避免它搶走 host label job。