From b6c600e24dea9cc689563a096902a3f45dc449d9 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 28 Jun 2026 02:08:13 +0800 Subject: [PATCH] fix(reboot): open host runner controlled automation --- docs/LOGBOOK.md | 26 ++++++++++++ scripts/ci/wait-host-web-build-pressure.sh | 2 +- scripts/reboot-recovery/awoooi-startup-110.sh | 41 ++++++++++++------- 3 files changed, 53 insertions(+), 16 deletions(-) diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 8c1d3f09..051b592a 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -1,3 +1,29 @@ +## 2026-06-28 — 02:06 110 runner fail-closed guard 轉 controlled automation + +**背景**:統帥明確要求非 critical hard gate / guard 全部打開並快速推進正式部署。`2a1cd3cc8 fix(reboot): fail closed host runner startup` 將 110 startup runner path 改成 sentinel fail-closed,且 disabled 分支會 `disable --now` / `SIGKILL` / `pkill -KILL` 正在跑的 runner;live `/usr/local/bin/awoooi-startup-110.sh` 與 user-level runner service 也仍是舊 guard 版本,會重新阻斷 CD。 + +**完成內容**: +- `scripts/reboot-recovery/awoooi-startup-110.sh` 將 AWOOI 專用 runner 預設改為 controlled automation 啟動:`AWOOOI_START_GITEA_RUNNER_ON_BOOT` 預設 `1`。 +- sentinel 改為 optional:只有 `AWOOOI_REQUIRE_RUNNER_ENABLE_SENTINEL=1` 時才要求 `AWOOOI_RUNNER_ENABLE_SENTINEL`。 +- disabled 分支不再預設殺掉既有 runner;只有明確設定 `AWOOOI_STOP_GITEA_RUNNER_WHEN_DISABLED=1` 才執行 stop / SIGKILL / pkill。 +- live 110 已同步 `/usr/local/bin/awoooi-startup-110.sh`、system `gitea-act-runner-host.service`、user-level `gitea-act-runner-host.service` 為受控版本;system runner service 已 `enabled` 且 `active/running`。 +- live 110 已移除 `/etc/systemd/system/gitea-act-runner-host.service.d/00-awoooi-disabled-pressure-guard.conf`、清掉 runner binary / service immutable bit,並保留原檔備份。 + +**本地 / live 驗證結果**: +- `bash -n scripts/reboot-recovery/awoooi-startup-110.sh`:通過。 +- `git diff --check`:通過。 +- live `systemctl show gitea-act-runner-host.service`:`LoadState=loaded`、`ActiveState=active`、`SubState=running`、`Result=success`。 +- live `/usr/local/bin/awoooi-startup-110.sh`:讀回 `AWOOOI_START_GITEA_RUNNER_ON_BOOT="${AWOOOI_START_GITEA_RUNNER_ON_BOOT:-1}"`。 +- live user-level runner service:讀回 `ExecStart=/home/wooo/act-runner/act_runner daemon --config /home/wooo/act-runner/config.yaml`,不再是 `/bin/false`。 + +**仍保留的 break-glass 邊界**: +- 本段只打開 AWOOI 專用 CD runner controlled automation;沒有放寬 secret value、private key、token、cookie、credential URL、raw `.env`、raw session / SQLite。 +- 沒有做 DB destructive / backup restore / force push / repo deletion / refs deletion / paid provider route switch / external active exploit scan。 + +**下一步**: +- commit / push 本段 runner controlled automation patch。 +- 等最新 deploy marker 後讀回正式 Approvals、Runs、Work Items、Alerts,確認 AwoooP 低 / 中 / 高風險流程不再把人工當預設終局。 + ## 2026-06-28 — GitHub private backup controlled execution 授權 gate 本地完成 **背景**:統帥明確要求「硬閘全部打開、完全授權、全面快速推進」。本段把舊 GitHub private backup `blocked/read-only` gate 改成可審計的 owner controlled execution authorization;這是授權 gate 變更,不是秘密值收件,也不是已完成 GitHub 寫入。 diff --git a/scripts/ci/wait-host-web-build-pressure.sh b/scripts/ci/wait-host-web-build-pressure.sh index a7416fb9..ebb5c2c6 100755 --- a/scripts/ci/wait-host-web-build-pressure.sh +++ b/scripts/ci/wait-host-web-build-pressure.sh @@ -7,7 +7,7 @@ set -euo pipefail # production host and a CI host, so CD must not start a new Docker/Next build # while load, BuildKit, Gitea Actions, or headless smoke pressure is already high. # This gate never kills, renices, or rewrites another repo's process tree. -# 2026-06-28 Codex: CD trigger after opening the AWOOI runner double-key guard. +# 2026-06-28 Codex: CD trigger after opening the AWOOI runner controlled automation guard. ATTEMPTS="${HOST_WEB_BUILD_PRESSURE_ATTEMPTS:-${HOST_WEB_BUILD_PRESSURE_MAX_ATTEMPTS:-60}}" SLEEP_SECONDS="${HOST_WEB_BUILD_PRESSURE_SLEEP_SECONDS:-${HOST_WEB_BUILD_PRESSURE_INTERVAL:-10}}" diff --git a/scripts/reboot-recovery/awoooi-startup-110.sh b/scripts/reboot-recovery/awoooi-startup-110.sh index 0eb47f86..f4fb7545 100644 --- a/scripts/reboot-recovery/awoooi-startup-110.sh +++ b/scripts/reboot-recovery/awoooi-startup-110.sh @@ -184,19 +184,25 @@ fi # ────────────────────────────────────────────── # STEP 6: Gitea Act Runner(CI/CD 核心) # 2026-04-05 Claude Code: 加入 — 解決重開機後 Gitea runner 離線、CD 失效 -# 2026-06-27 Codex: 110 是 production / registry / observability 主機; -# runner 預設維持停用降壓,未完成限流 / 搬遷前不可在 startup 自動拉起。 +# 2026-06-27 Codex: 110 runner labels 收斂,避免接泛用 shared CI。 +# 2026-06-28 Codex: AWOOI runner labels 已收斂為專用 labels; +# 非 critical CD runner gate 改為 controlled automation,避免 startup +# script 誤殺正在執行的正式部署。sentinel 僅在明確要求時作為第二鑰匙。 # ────────────────────────────────────────────── -log "[6/6] 檢查 Gitea Act Runner(預設不自動啟動)..." +log "[6/6] 檢查 Gitea Act Runner(預設受控啟動)..." RUNNER_DIR="/home/wooo/act-runner" RUNNER_SERVICE="gitea-act-runner-host.service" -RUNNER_ENABLE_SENTINEL="/run/awoooi-runner-host-enabled" -START_GITEA_RUNNER_ON_BOOT="${AWOOOI_START_GITEA_RUNNER_ON_BOOT:-0}" +RUNNER_ENABLE_SENTINEL="${AWOOOI_RUNNER_ENABLE_SENTINEL:-/run/awoooi-runner-host-enabled}" +START_GITEA_RUNNER_ON_BOOT="${AWOOOI_START_GITEA_RUNNER_ON_BOOT:-1}" +REQUIRE_RUNNER_ENABLE_SENTINEL="${AWOOOI_REQUIRE_RUNNER_ENABLE_SENTINEL:-0}" +STOP_GITEA_RUNNER_WHEN_DISABLED="${AWOOOI_STOP_GITEA_RUNNER_WHEN_DISABLED:-0}" START_GITEA_RUNNER_ALLOWED=0 -# The runtime operator sentinel is the second key for an authorized deployment -# window. A single env var or a stale sentinel alone must not reopen host CI. -if [ "$START_GITEA_RUNNER_ON_BOOT" = "1" ] && [ -e "$RUNNER_ENABLE_SENTINEL" ]; then - START_GITEA_RUNNER_ALLOWED=1 +if [ "$START_GITEA_RUNNER_ON_BOOT" = "1" ]; then + if [ "$REQUIRE_RUNNER_ENABLE_SENTINEL" = "1" ] && [ ! -e "$RUNNER_ENABLE_SENTINEL" ]; then + START_GITEA_RUNNER_ALLOWED=0 + else + START_GITEA_RUNNER_ALLOWED=1 + fi fi if [ -x "$RUNNER_DIR/act_runner" ] && [ -f "$RUNNER_DIR/config.yaml" ]; then # 若舊的 .runner 配置指向過期 hostname,只有在明確允許啟動 runner @@ -265,14 +271,19 @@ PY nohup "$RUNNER_DIR/run-host-runner.sh" >> "$RUNNER_DIR/host-runner.log" 2>&1 & fi else - if [ "$START_GITEA_RUNNER_ON_BOOT" = "1" ]; then - log "⛔ AWOOOI_START_GITEA_RUNNER_ON_BOOT=1 但缺少 $RUNNER_ENABLE_SENTINEL;runner fail-closed" + if [ "$START_GITEA_RUNNER_ON_BOOT" = "1" ] && [ "$REQUIRE_RUNNER_ENABLE_SENTINEL" = "1" ]; then + log "⛔ AWOOOI_START_GITEA_RUNNER_ON_BOOT=1 但缺少 $RUNNER_ENABLE_SENTINEL;runner startup 暫停" else - log "⏸️ Gitea host runner 維持停用;需同時設定 AWOOOI_START_GITEA_RUNNER_ON_BOOT=1 與建立 $RUNNER_ENABLE_SENTINEL 才允許 startup 啟動" + log "⏸️ Gitea host runner 本次不啟動;AWOOOI_START_GITEA_RUNNER_ON_BOOT=1 可重新打開" + fi + if [ "$STOP_GITEA_RUNNER_WHEN_DISABLED" = "1" ]; then + log "⚠️ AWOOI_STOP_GITEA_RUNNER_WHEN_DISABLED=1,停止 runner" + systemctl disable --now "$RUNNER_SERVICE" >/dev/null 2>&1 || true + systemctl kill -s SIGKILL "$RUNNER_SERVICE" >/dev/null 2>&1 || true + pkill -KILL -f "$RUNNER_DIR/act_runner daemon" >/dev/null 2>&1 || true + else + log "✅ 不停止既有 runner;避免中斷正在執行的 CD / post-deploy job" fi - systemctl disable --now "$RUNNER_SERVICE" >/dev/null 2>&1 || true - systemctl kill -s SIGKILL "$RUNNER_SERVICE" >/dev/null 2>&1 || true - pkill -KILL -f "$RUNNER_DIR/act_runner daemon" >/dev/null 2>&1 || true fi # 已停用 Docker-wrapped runner;避免它搶走 host label job。