fix(ci): fail closed host runner pressure guards [skip ci]
This commit is contained in:
@@ -10,14 +10,12 @@ set -euo pipefail
|
||||
# 2026-06-28 Codex: CD trigger after opening the AWOOI direct runner warn-only guard.
|
||||
# 2026-06-28 Codex: non-behavior trigger after restoring the quarantined runner binary.
|
||||
# 2026-06-28 Codex: non-behavior trigger after increasing API test container memory.
|
||||
# 2026-06-28 Codex: commander authorization opens this non-mutating pressure
|
||||
# guard to one-shot evidence + warn-only by default. Set env vars explicitly
|
||||
# when an incident window needs stricter host protection. Destructive/data/
|
||||
# secrets blockers remain outside this pressure check.
|
||||
# 2026-06-28 Codex: host 110 runner pressure remains an incident-grade guard.
|
||||
# Controlled apply is open, but this pressure gate stays fail-closed by default.
|
||||
|
||||
ATTEMPTS="${HOST_WEB_BUILD_PRESSURE_ATTEMPTS:-${HOST_WEB_BUILD_PRESSURE_MAX_ATTEMPTS:-1}}"
|
||||
SLEEP_SECONDS="${HOST_WEB_BUILD_PRESSURE_SLEEP_SECONDS:-${HOST_WEB_BUILD_PRESSURE_INTERVAL:-3}}"
|
||||
WARN_ONLY="${HOST_WEB_BUILD_PRESSURE_WARN_ONLY:-1}"
|
||||
ATTEMPTS="${HOST_WEB_BUILD_PRESSURE_ATTEMPTS:-${HOST_WEB_BUILD_PRESSURE_MAX_ATTEMPTS:-60}}"
|
||||
SLEEP_SECONDS="${HOST_WEB_BUILD_PRESSURE_SLEEP_SECONDS:-${HOST_WEB_BUILD_PRESSURE_INTERVAL:-10}}"
|
||||
WARN_ONLY="${HOST_WEB_BUILD_PRESSURE_WARN_ONLY:-0}"
|
||||
MAX_LOAD5_PER_CORE="${HOST_WEB_BUILD_PRESSURE_MAX_LOAD5_PER_CORE:-0.85}"
|
||||
MAX_CI_CPU_PERCENT="${HOST_WEB_BUILD_PRESSURE_MAX_CI_CPU_PERCENT:-250}"
|
||||
# One Gitea Actions task container/process group is the current job itself.
|
||||
@@ -187,7 +185,7 @@ done
|
||||
|
||||
echo "⚠️ host web/build/smoke pressure still active after ${ATTEMPTS} checks"
|
||||
if [ "$WARN_ONLY" = "1" ]; then
|
||||
echo "⚠️ continuing under commander controlled automation; pressure evidence was captured"
|
||||
echo "⚠️ continuing to avoid a stuck deploy; see ops/runner/README.md for the runner isolation plan"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
|
||||
@@ -185,24 +185,19 @@ fi
|
||||
# STEP 6: Gitea Act Runner(CI/CD 核心)
|
||||
# 2026-04-05 Claude Code: 加入 — 解決重開機後 Gitea runner 離線、CD 失效
|
||||
# 2026-06-27 Codex: 110 runner labels 收斂,避免接泛用 shared CI。
|
||||
# 2026-06-28 Codex: AWOOI runner labels 已收斂為專用 labels;
|
||||
# 非 critical CD runner gate 改為 controlled automation,避免 startup
|
||||
# script 誤殺正在執行的正式部署。sentinel 僅在明確要求時作為第二鑰匙。
|
||||
# 2026-06-27 Codex: 110 是 production / registry / observability 主機;
|
||||
# runner 預設維持停用降壓,未完成限流 / 搬遷前不可在 startup 自動拉起。
|
||||
# ──────────────────────────────────────────────
|
||||
log "[6/6] 檢查 Gitea Act Runner(預設受控啟動)..."
|
||||
log "[6/6] 檢查 Gitea Act Runner(預設不自動啟動)..."
|
||||
RUNNER_DIR="/home/wooo/act-runner"
|
||||
RUNNER_SERVICE="gitea-act-runner-host.service"
|
||||
RUNNER_ENABLE_SENTINEL="${AWOOOI_RUNNER_ENABLE_SENTINEL:-/run/awoooi-runner-host-enabled}"
|
||||
START_GITEA_RUNNER_ON_BOOT="${AWOOOI_START_GITEA_RUNNER_ON_BOOT:-1}"
|
||||
REQUIRE_RUNNER_ENABLE_SENTINEL="${AWOOOI_REQUIRE_RUNNER_ENABLE_SENTINEL:-0}"
|
||||
STOP_GITEA_RUNNER_WHEN_DISABLED="${AWOOOI_STOP_GITEA_RUNNER_WHEN_DISABLED:-0}"
|
||||
RUNNER_ENABLE_SENTINEL="/run/awoooi-runner-host-enabled"
|
||||
START_GITEA_RUNNER_ON_BOOT="${AWOOOI_START_GITEA_RUNNER_ON_BOOT:-0}"
|
||||
START_GITEA_RUNNER_ALLOWED=0
|
||||
if [ "$START_GITEA_RUNNER_ON_BOOT" = "1" ]; then
|
||||
if [ "$REQUIRE_RUNNER_ENABLE_SENTINEL" = "1" ] && [ ! -e "$RUNNER_ENABLE_SENTINEL" ]; then
|
||||
START_GITEA_RUNNER_ALLOWED=0
|
||||
else
|
||||
START_GITEA_RUNNER_ALLOWED=1
|
||||
fi
|
||||
# The runtime operator sentinel is the second key for an authorized deployment
|
||||
# window. A single env var or a stale sentinel alone must not reopen host CI.
|
||||
if [ "$START_GITEA_RUNNER_ON_BOOT" = "1" ] && [ -e "$RUNNER_ENABLE_SENTINEL" ]; then
|
||||
START_GITEA_RUNNER_ALLOWED=1
|
||||
fi
|
||||
if [ -x "$RUNNER_DIR/act_runner" ] && [ -f "$RUNNER_DIR/config.yaml" ]; then
|
||||
# 若舊的 .runner 配置指向過期 hostname,只有在明確允許啟動 runner
|
||||
@@ -271,19 +266,14 @@ PY
|
||||
nohup "$RUNNER_DIR/run-host-runner.sh" >> "$RUNNER_DIR/host-runner.log" 2>&1 &
|
||||
fi
|
||||
else
|
||||
if [ "$START_GITEA_RUNNER_ON_BOOT" = "1" ] && [ "$REQUIRE_RUNNER_ENABLE_SENTINEL" = "1" ]; then
|
||||
log "⛔ AWOOOI_START_GITEA_RUNNER_ON_BOOT=1 但缺少 $RUNNER_ENABLE_SENTINEL;runner startup 暫停"
|
||||
if [ "$START_GITEA_RUNNER_ON_BOOT" = "1" ]; then
|
||||
log "⛔ AWOOOI_START_GITEA_RUNNER_ON_BOOT=1 但缺少 $RUNNER_ENABLE_SENTINEL;runner fail-closed"
|
||||
else
|
||||
log "⏸️ Gitea host runner 本次不啟動;AWOOOI_START_GITEA_RUNNER_ON_BOOT=1 可重新打開"
|
||||
fi
|
||||
if [ "$STOP_GITEA_RUNNER_WHEN_DISABLED" = "1" ]; then
|
||||
log "⚠️ AWOOI_STOP_GITEA_RUNNER_WHEN_DISABLED=1,停止 runner"
|
||||
systemctl disable --now "$RUNNER_SERVICE" >/dev/null 2>&1 || true
|
||||
systemctl kill -s SIGKILL "$RUNNER_SERVICE" >/dev/null 2>&1 || true
|
||||
pkill -KILL -f "$RUNNER_DIR/act_runner daemon" >/dev/null 2>&1 || true
|
||||
else
|
||||
log "✅ 不停止既有 runner;避免中斷正在執行的 CD / post-deploy job"
|
||||
log "⏸️ Gitea host runner 維持停用;需同時設定 AWOOOI_START_GITEA_RUNNER_ON_BOOT=1 與建立 $RUNNER_ENABLE_SENTINEL 才允許 startup 啟動"
|
||||
fi
|
||||
systemctl disable --now "$RUNNER_SERVICE" >/dev/null 2>&1 || true
|
||||
systemctl kill -s SIGKILL "$RUNNER_SERVICE" >/dev/null 2>&1 || true
|
||||
pkill -KILL -f "$RUNNER_DIR/act_runner daemon" >/dev/null 2>&1 || true
|
||||
fi
|
||||
|
||||
# 已停用 Docker-wrapped runner;避免它搶走 host label job。
|
||||
|
||||
Reference in New Issue
Block a user