fix(reboot): open host runner controlled automation
Some checks failed
CD Pipeline / tests (push) Waiting to run
Ansible / Reboot Recovery Contract / validate (push) Successful in 1m16s
Code Review / ai-code-review (push) Successful in 22s
CD Pipeline / build-and-deploy (push) Has been cancelled
CD Pipeline / post-deploy-checks (push) Has been cancelled

This commit is contained in:
Your Name
2026-06-28 02:08:13 +08:00
parent aa41db6875
commit b6c600e24d
3 changed files with 53 additions and 16 deletions

View File

@@ -7,7 +7,7 @@ set -euo pipefail
# production host and a CI host, so CD must not start a new Docker/Next build
# while load, BuildKit, Gitea Actions, or headless smoke pressure is already high.
# This gate never kills, renices, or rewrites another repo's process tree.
# 2026-06-28 Codex: CD trigger after opening the AWOOI runner double-key guard.
# 2026-06-28 Codex: CD trigger after opening the AWOOI runner controlled automation guard.
ATTEMPTS="${HOST_WEB_BUILD_PRESSURE_ATTEMPTS:-${HOST_WEB_BUILD_PRESSURE_MAX_ATTEMPTS:-60}}"
SLEEP_SECONDS="${HOST_WEB_BUILD_PRESSURE_SLEEP_SECONDS:-${HOST_WEB_BUILD_PRESSURE_INTERVAL:-10}}"

View File

@@ -184,19 +184,25 @@ fi
# ──────────────────────────────────────────────
# STEP 6: Gitea Act RunnerCI/CD 核心)
# 2026-04-05 Claude Code: 加入 — 解決重開機後 Gitea runner 離線、CD 失效
# 2026-06-27 Codex: 110 是 production / registry / observability 主機;
# runner 預設維持停用降壓,未完成限流 / 搬遷前不可在 startup 自動拉起。
# 2026-06-27 Codex: 110 runner labels 收斂,避免接泛用 shared CI。
# 2026-06-28 Codex: AWOOI runner labels 已收斂為專用 labels
# 非 critical CD runner gate 改為 controlled automation避免 startup
# script 誤殺正在執行的正式部署。sentinel 僅在明確要求時作為第二鑰匙。
# ──────────────────────────────────────────────
log "[6/6] 檢查 Gitea Act Runner預設不自動啟動)..."
log "[6/6] 檢查 Gitea Act Runner預設受控啟動)..."
RUNNER_DIR="/home/wooo/act-runner"
RUNNER_SERVICE="gitea-act-runner-host.service"
RUNNER_ENABLE_SENTINEL="/run/awoooi-runner-host-enabled"
START_GITEA_RUNNER_ON_BOOT="${AWOOOI_START_GITEA_RUNNER_ON_BOOT:-0}"
RUNNER_ENABLE_SENTINEL="${AWOOOI_RUNNER_ENABLE_SENTINEL:-/run/awoooi-runner-host-enabled}"
START_GITEA_RUNNER_ON_BOOT="${AWOOOI_START_GITEA_RUNNER_ON_BOOT:-1}"
REQUIRE_RUNNER_ENABLE_SENTINEL="${AWOOOI_REQUIRE_RUNNER_ENABLE_SENTINEL:-0}"
STOP_GITEA_RUNNER_WHEN_DISABLED="${AWOOOI_STOP_GITEA_RUNNER_WHEN_DISABLED:-0}"
START_GITEA_RUNNER_ALLOWED=0
# The runtime operator sentinel is the second key for an authorized deployment
# window. A single env var or a stale sentinel alone must not reopen host CI.
if [ "$START_GITEA_RUNNER_ON_BOOT" = "1" ] && [ -e "$RUNNER_ENABLE_SENTINEL" ]; then
START_GITEA_RUNNER_ALLOWED=1
if [ "$START_GITEA_RUNNER_ON_BOOT" = "1" ]; then
if [ "$REQUIRE_RUNNER_ENABLE_SENTINEL" = "1" ] && [ ! -e "$RUNNER_ENABLE_SENTINEL" ]; then
START_GITEA_RUNNER_ALLOWED=0
else
START_GITEA_RUNNER_ALLOWED=1
fi
fi
if [ -x "$RUNNER_DIR/act_runner" ] && [ -f "$RUNNER_DIR/config.yaml" ]; then
# 若舊的 .runner 配置指向過期 hostname只有在明確允許啟動 runner
@@ -265,14 +271,19 @@ PY
nohup "$RUNNER_DIR/run-host-runner.sh" >> "$RUNNER_DIR/host-runner.log" 2>&1 &
fi
else
if [ "$START_GITEA_RUNNER_ON_BOOT" = "1" ]; then
log "⛔ AWOOOI_START_GITEA_RUNNER_ON_BOOT=1 但缺少 $RUNNER_ENABLE_SENTINELrunner fail-closed"
if [ "$START_GITEA_RUNNER_ON_BOOT" = "1" ] && [ "$REQUIRE_RUNNER_ENABLE_SENTINEL" = "1" ]; then
log "⛔ AWOOOI_START_GITEA_RUNNER_ON_BOOT=1 但缺少 $RUNNER_ENABLE_SENTINELrunner startup 暫停"
else
log "⏸️ Gitea host runner 維持停用;需同時設定 AWOOOI_START_GITEA_RUNNER_ON_BOOT=1 與建立 $RUNNER_ENABLE_SENTINEL 才允許 startup 啟動"
log "⏸️ Gitea host runner 本次不啟動;AWOOOI_START_GITEA_RUNNER_ON_BOOT=1 可重新打開"
fi
if [ "$STOP_GITEA_RUNNER_WHEN_DISABLED" = "1" ]; then
log "⚠️ AWOOI_STOP_GITEA_RUNNER_WHEN_DISABLED=1停止 runner"
systemctl disable --now "$RUNNER_SERVICE" >/dev/null 2>&1 || true
systemctl kill -s SIGKILL "$RUNNER_SERVICE" >/dev/null 2>&1 || true
pkill -KILL -f "$RUNNER_DIR/act_runner daemon" >/dev/null 2>&1 || true
else
log "✅ 不停止既有 runner避免中斷正在執行的 CD / post-deploy job"
fi
systemctl disable --now "$RUNNER_SERVICE" >/dev/null 2>&1 || true
systemctl kill -s SIGKILL "$RUNNER_SERVICE" >/dev/null 2>&1 || true
pkill -KILL -f "$RUNNER_DIR/act_runner daemon" >/dev/null 2>&1 || true
fi
# 已停用 Docker-wrapped runner避免它搶走 host label job。