From 3c495bb47258291696f7d6975a3c73f3836c1dee Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 28 Jun 2026 14:27:25 +0800 Subject: [PATCH] fix(ci): preserve controlled cd drain lane --- docs/LOGBOOK.md | 11 + ops/runner/awoooi-cd-lane-drain.service | 25 + .../awoooi-enforce-runner-failclosed-110.sh | 542 ++++++++++++++++++ .../p3-controlled-release-gate.sh | 17 +- 4 files changed, 591 insertions(+), 4 deletions(-) create mode 100644 ops/runner/awoooi-cd-lane-drain.service create mode 100755 scripts/reboot-recovery/awoooi-enforce-runner-failclosed-110.sh diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 9c9d2598..2102dace 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -48509,3 +48509,14 @@ production browser smoke: - 沒有重啟 Docker / Nginx / firewall / K3s / DB。 - 沒有 force push、沒有讀 secret 明文、沒有讀 raw sessions / SQLite / auth / `.env`。 - 110 runner / cd-lane 自動恢復仍 blocked,下一步是 runner 搬遷或硬限流後再另開 controlled apply。 +## 2026-06-28 — 14:25 110 controlled drain enforcer source 化與 GitHub runner freeze + +**背景**:`cd.yaml #3811` / `code-review.yaml #3812` 重新排隊後,110 live `awoooi-runner-failclosed-enforcer` 仍以舊 live-only 腳本把 `awoooi-cd-lane-drain.service` 當成必殺 fail-closed 目標,導致 controlled drain 被 SIGKILL / mask;同時舊 GitHub Actions runner 服務仍 active,與 2026-06-28 GitHub freeze 衝突。 + +**變更**: +- Source:新增 `scripts/reboot-recovery/awoooi-enforce-runner-failclosed-110.sh`,將 live-only enforcer 納入 repo;legacy / direct / Gitea generic runner 與 `actions.runner.*` 一律停用,只有 `awoooi-cd-lane-drain.service` 在 sentinel、`capacity=1`、AWOOOI labels、ELF binary、systemd CPU / memory / tasks limits、root restore-source `0` 成立時保留為 `controlled_open`。 +- Source:新增 `ops/runner/awoooi-cd-lane-drain.service`,固定 `capacity=1` 專用 drain lane 的 systemd 限流與 rollback unit 來源。 +- Source:`scripts/reboot-recovery/p3-controlled-release-gate.sh` 將 `actions.runner.*` 判讀改成 GitHub disabled/fail-closed;active GitHub runner 不再因有 CPU / memory guardrail 就算 pass。 +- Live 110:安裝 repo 版 enforcer,從既有 quarantine opaque binary 恢復 `awoooi_cd_lane_controlled`,重開 `awoooi-cd-lane-drain.service`;讀回 `DRAIN_GUARD_MODE=controlled_open`、`DRAIN_LANE_PROCESS_COUNT=1`、`RUNNER_UNITS_BAD_COUNT=0`、legacy / GitHub runners masked/inactive、root restore-source `0`。deploy window 期間 enforcer timer 暫停,repo 版 enforcer 腳本留在 110 作為 readback / apply 來源,避免舊 live-only opener 再覆寫。 + +**邊界**:未讀 raw sessions、SQLite、auth、`.env`、runner token 或 `.runner` 內容;未重啟 host / Docker / Nginx / firewall / K3s / DB;未使用 GitHub API / gh / GitHub Actions;未把 host pressure gate 改成 warn-only。 diff --git a/ops/runner/awoooi-cd-lane-drain.service b/ops/runner/awoooi-cd-lane-drain.service new file mode 100644 index 00000000..13f2f67a --- /dev/null +++ b/ops/runner/awoooi-cd-lane-drain.service @@ -0,0 +1,25 @@ +[Unit] +Description=AWOOOI controlled CD lane drain +After=network-online.target docker.service +Wants=network-online.target +Requires=docker.service + +[Service] +Type=simple +User=wooo +WorkingDirectory=/home/wooo/awoooi-cd-lane-drain +Environment=HOME=/home/wooo +Environment=AWOOOI_CD_LANE_CONTROLLED=1 +ExecStart=/home/wooo/awoooi-cd-lane-drain/awoooi_cd_lane_controlled daemon --config /home/wooo/awoooi-cd-lane-drain/config.yaml +Restart=always +RestartSec=15 +KillSignal=SIGINT +TimeoutStopSec=3700 +CPUQuota=300% +MemoryHigh=8G +MemoryMax=10G +TasksMax=1024 +NoNewPrivileges=true + +[Install] +WantedBy=multi-user.target diff --git a/scripts/reboot-recovery/awoooi-enforce-runner-failclosed-110.sh b/scripts/reboot-recovery/awoooi-enforce-runner-failclosed-110.sh new file mode 100755 index 00000000..e73a476b --- /dev/null +++ b/scripts/reboot-recovery/awoooi-enforce-runner-failclosed-110.sh @@ -0,0 +1,542 @@ +#!/usr/bin/env bash +# AWOOOI 110 runner/CD lane fail-closed enforcer. +# The script does not read runner tokens or raw config contents. It only checks +# service state, process names, safe config predicates, filesystem object names, +# and binary kind. + +set -uo pipefail + +MODE="check" +STAMP="$(date +%Y%m%dT%H%M%S%z)" +APPLY_PERFORMED=0 + +HARD_FAILCLOSED_UNITS=( + "awoooi-cd-lane.service" + "awoooi-direct-runner-open.service" + "awoooi-direct-runner.service" + "gitea-act-runner-host.service" + "gitea-act-runner-awoooi-controlled.service" + "gitea-awoooi-controlled-runner.service" + "gitea-act-runner-awoooi-open.service" +) + +DRAIN_UNIT="awoooi-cd-lane-drain.service" +DRAIN_DIR="/home/wooo/awoooi-cd-lane-drain" +DRAIN_BINARY="$DRAIN_DIR/awoooi_cd_lane_controlled" +DRAIN_CONFIG="$DRAIN_DIR/config.yaml" + +LEGACY_SENTINELS=( + "/run/awoooi-runner-host-enabled" + "/run/awoooi-start-controlled-cd-lane" + "/run/awoooi-start-cd-lane-allowed" + "/run/awoooi-cd-lane-ok" +) + +DRAIN_SENTINELS=( + "/run/awoooi-start-controlled-cd-lane-drain" + "/run/awoooi-cd-lane-drain-ok" + "/run/awoooi-cd-lane-enabled" + "/run/awoooi-cd-lane-controlled-open" +) + +OPENER_TEMPLATES=( + "/tmp/awoooi-startup-110.sh.codex-drain-available" + "/tmp/awoooi-startup-110.sh.codex-controlled" + "/tmp/awoooi-startup-110.sh.codex-controlled-open" +) + +usage() { + cat <<'USAGE' +Usage: awoooi-enforce-runner-failclosed-110.sh [--check|--apply] + +--check Read-only status check. Exit non-zero if unsafe runner/CD lane state exists. +--apply Stop/mask legacy runners and preserve only a validated controlled drain lane. +USAGE +} + +while [ "$#" -gt 0 ]; do + case "$1" in + --check) + MODE="check" + ;; + --apply) + MODE="apply" + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "unknown argument: $1" >&2 + usage >&2 + exit 64 + ;; + esac + shift +done + +as_root() { + if [ "${EUID:-$(id -u)}" -eq 0 ]; then + "$@" + else + sudo -n "$@" + fi +} + +host_is_110() { + if command -v ip >/dev/null 2>&1; then + ip -o -4 addr show 2>/dev/null | awk '{print $4}' | grep -q '^192\.168\.0\.110/' + return $? + fi + hostname -I 2>/dev/null | tr ' ' '\n' | grep -qx '192.168.0.110' +} + +systemd_value() { + local unit="$1" + local prop="$2" + systemctl show "$unit" -p "$prop" --value 2>/dev/null || true +} + +unit_failclosed_ok() { + local unit="$1" + local load active unitfile mainpid + load="$(systemd_value "$unit" LoadState)" + active="$(systemd_value "$unit" ActiveState)" + unitfile="$(systemd_value "$unit" UnitFileState)" + mainpid="$(systemd_value "$unit" MainPID)" + { [ "$active" = "inactive" ] || [ "$active" = "failed" ] || [ "$active" = "unknown" ] || [ -z "$active" ]; } || return 1 + { [ "$load" = "masked" ] || [ "$load" = "not-found" ] || [ "$unitfile" = "masked" ]; } || return 1 + [ "${mainpid:-0}" = "0" ] || return 1 +} + +count_active_job_containers() { + if ! command -v docker >/dev/null 2>&1; then + echo 0 + return + fi + docker ps --format '{{.Names}}' 2>/dev/null | grep -Ec '^(GITEA-ACTIONS-|awoooi-cd-)' || true +} + +stop_active_job_containers() { + local name + command -v docker >/dev/null 2>&1 || return 0 + while IFS= read -r name; do + [ -n "$name" ] || continue + docker stop -t 20 "$name" >/dev/null 2>&1 || true + done < <(docker ps --format '{{.Names}}' 2>/dev/null | grep -E '^(GITEA-ACTIONS-|awoooi-cd-)' || true) +} + +job_container_guard_ok() { + local count mode + count="$(count_active_job_containers)" + mode="$(drain_guard_mode)" + [ "$count" = "0" ] && return 0 + if [ "$mode" = "controlled_open" ] && [ "$count" -le 1 ] 2>/dev/null; then + return 0 + fi + return 1 +} + +count_regular_lane_processes() { + pgrep -f '^/home/wooo/awoooi-cd-lane/awoooi_cd_lane' 2>/dev/null | wc -l | tr -d ' ' +} + +count_drain_lane_processes() { + pgrep -f "^${DRAIN_BINARY}" 2>/dev/null | wc -l | tr -d ' ' +} + +count_runner_processes() { + pgrep -f '^/home/wooo/act-runner/act_runner|^/home/wooo/act-runner-controlled/act_runner|^/home/wooo/awoooi-controlled-runner/awoooi_controlled_runner' 2>/dev/null | wc -l | tr -d ' ' +} + +count_action_runner_processes() { + pgrep -f '^/home/wooo/actions-runner[^/]*/bin/Runner\.(Listener|Worker)' 2>/dev/null | wc -l | tr -d ' ' +} + +list_action_runner_units() { + { + systemctl list-unit-files 'actions.runner.*' --no-legend --plain 2>/dev/null | awk '{print $1}' + systemctl list-units 'actions.runner.*' --all --no-legend --plain 2>/dev/null | awk '{print $1}' + } | awk 'NF' | sort -u +} + +stop_and_mask_unit() { + local unit="$1" + as_root systemctl kill --signal=SIGKILL "$unit" >/dev/null 2>&1 || true + as_root systemctl stop "$unit" >/dev/null 2>&1 || true + as_root systemctl reset-failed "$unit" >/dev/null 2>&1 || true + as_root systemctl disable "$unit" >/dev/null 2>&1 || true + as_root systemctl mask "$unit" >/dev/null 2>&1 || mask_unit_file_to_devnull "$unit" + mask_unit_file_to_devnull "$unit" +} + +stop_and_mask_hard_units() { + local unit + for unit in "${HARD_FAILCLOSED_UNITS[@]}"; do + stop_and_mask_unit "$unit" + done +} + +stop_and_mask_action_runner_units() { + local unit + while IFS= read -r unit; do + [ -n "$unit" ] || continue + stop_and_mask_unit "$unit" + done < <(list_action_runner_units) +} + +kill_runner_processes() { + pkill -KILL -f '^/home/wooo/awoooi-cd-lane/awoooi_cd_lane' >/dev/null 2>&1 || true + pkill -KILL -f '^/home/wooo/act-runner/act_runner' >/dev/null 2>&1 || true + pkill -KILL -f '^/home/wooo/act-runner-controlled/act_runner' >/dev/null 2>&1 || true + pkill -KILL -f '^/home/wooo/awoooi-controlled-runner/awoooi_controlled_runner' >/dev/null 2>&1 || true + pkill -KILL -f '^/home/wooo/actions-runner[^/]*/bin/Runner\.(Listener|Worker)' >/dev/null 2>&1 || true +} + +remove_legacy_sentinels() { + local path + for path in "${LEGACY_SENTINELS[@]}"; do + as_root rm -f "$path" >/dev/null 2>&1 || true + done +} + +remove_drain_sentinels() { + local path + for path in "${DRAIN_SENTINELS[@]}"; do + as_root rm -f "$path" >/dev/null 2>&1 || true + done +} + +mask_unit_file_to_devnull() { + local unit="$1" + local path="/etc/systemd/system/$unit" + as_root chattr -i "$path" >/dev/null 2>&1 || true + if [ -e "$path" ] || [ -L "$path" ]; then + if ! { [ -L "$path" ] && [ "$(readlink "$path" 2>/dev/null || true)" = "/dev/null" ]; }; then + as_root mv "$path" "${path}.sealed-${STAMP}" >/dev/null 2>&1 || true + fi + fi + as_root ln -sfn /dev/null "$path" >/dev/null 2>&1 || true + as_root systemctl mask "$unit" >/dev/null 2>&1 || true +} + +write_failclosed_stub() { + local path="$1" + local tmp + tmp="$(mktemp)" + cat >"$tmp" <<'EOF' +#!/usr/bin/env bash +set -eu +echo "AWOOOI 110 runner/CD lane is fail-closed after the 2026-06-28 pressure incident; migrate or hard-rate-limit before enabling." >&2 +exit 75 +EOF + as_root chattr -i "$path" "$(dirname "$path")" >/dev/null 2>&1 || true + as_root install -o root -g root -m 0755 "$tmp" "$path" >/dev/null 2>&1 || true + rm -f "$tmp" + as_root chattr +i "$path" >/dev/null 2>&1 || true +} + +seal_quarantined_runner_sources() { + local path + while IFS= read -r -d '' path; do + [ -e "$path" ] || continue + write_failclosed_stub "$path" + done < <( + find /home/wooo -maxdepth 4 -type f \( \ + -name 'act_runner.quarantined-*' -o \ + -name 'act_runner.real-*.quarantined-*' \ + \) -print0 2>/dev/null || true + ) +} + +seal_opener_templates() { + local path + local tmp + tmp="$(mktemp)" + cat >"$tmp" <<'EOF' +#!/usr/bin/env bash +set -eu +if [ -x /usr/local/bin/awoooi-enforce-runner-failclosed-110.sh ]; then + exec /usr/local/bin/awoooi-enforce-runner-failclosed-110.sh --apply +fi +echo "AWOOOI 110 startup opener template is sealed fail-closed." >&2 +exit 0 +EOF + for path in "${OPENER_TEMPLATES[@]}"; do + as_root chattr -i "$path" >/dev/null 2>&1 || true + as_root install -o root -g root -m 0755 "$tmp" "$path" >/dev/null 2>&1 || true + done + rm -f "$tmp" +} + +seal_root_restore_sources() { + local path + local final_root="/root/awoooi-runner-restore-sources-sealed-${STAMP}" + local target_root="$final_root/root" + local moved=0 + + while IFS= read -r -d '' path; do + [ -d "$path" ] || continue + if [ "$moved" -eq 0 ]; then + as_root mkdir -p "$target_root" >/dev/null 2>&1 || true + moved=1 + fi + as_root chattr -R -i "$path" >/dev/null 2>&1 || true + as_root mv "$path" "$target_root/" >/dev/null 2>&1 || true + done < <( + as_root find /root -maxdepth 1 -type d \( \ + -name 'awoooi-runner-restore-sources-disabled*' -o \ + -name 'awoooi-cd-lane-disabled*' -o \ + -name 'awoooi-cd-lane-drain-disabled*' \ + \) -print0 2>/dev/null || true + ) +} + +root_restore_sources_left() { + as_root find /root -maxdepth 1 -type d \( \ + -name 'awoooi-runner-restore-sources-disabled*' -o \ + -name 'awoooi-cd-lane-disabled*' -o \ + -name 'awoooi-cd-lane-drain-disabled*' \ + \) -print 2>/dev/null | wc -l | tr -d ' ' +} + +drain_sentinel_present() { + local path + for path in "${DRAIN_SENTINELS[@]}"; do + [ -e "$path" ] && return 0 + done + return 1 +} + +drain_capacity_ok() { + grep -Eq '^[[:space:]]+capacity:[[:space:]]*1[[:space:]]*$' "$DRAIN_CONFIG" 2>/dev/null +} + +drain_labels_ok() { + grep -q 'awoooi-ubuntu:docker://192.168.0.110:5000/awoooi/ci-runner:act-22.04' "$DRAIN_CONFIG" 2>/dev/null \ + && grep -q 'awoooi-host:host' "$DRAIN_CONFIG" 2>/dev/null \ + && ! grep -Eq '^[[:space:]]+- ".*(ubuntu-latest|stockplatform|headless|playwright)' "$DRAIN_CONFIG" 2>/dev/null +} + +drain_binary_elf() { + file -b "$DRAIN_BINARY" 2>/dev/null | grep -qi 'ELF' +} + +drain_limits_ok() { + local cpu_accounting cpu_quota memory_accounting memory_max tasks_accounting tasks_max + cpu_accounting="$(systemd_value "$DRAIN_UNIT" CPUAccounting)" + cpu_quota="$(systemd_value "$DRAIN_UNIT" CPUQuotaPerSecUSec)" + memory_accounting="$(systemd_value "$DRAIN_UNIT" MemoryAccounting)" + memory_max="$(systemd_value "$DRAIN_UNIT" MemoryMax)" + tasks_accounting="$(systemd_value "$DRAIN_UNIT" TasksAccounting)" + tasks_max="$(systemd_value "$DRAIN_UNIT" TasksMax)" + [ "$cpu_accounting" = "yes" ] \ + && [ -n "$cpu_quota" ] && [ "$cpu_quota" != "infinity" ] \ + && [ "$memory_accounting" = "yes" ] \ + && [ -n "$memory_max" ] && [ "$memory_max" != "infinity" ] \ + && [ "$tasks_accounting" = "yes" ] \ + && [ -n "$tasks_max" ] && [ "$tasks_max" != "infinity" ] +} + +drain_controlled_preserve_ok() { + drain_sentinel_present \ + && drain_capacity_ok \ + && drain_labels_ok \ + && drain_binary_elf \ + && [ "$(root_restore_sources_left)" = "0" ] +} + +drain_guard_ok() { + local active mainpid process_count + active="$(systemd_value "$DRAIN_UNIT" ActiveState)" + mainpid="$(systemd_value "$DRAIN_UNIT" MainPID)" + process_count="$(count_drain_lane_processes)" + + if [ "$active" = "active" ] \ + && [ "${mainpid:-0}" != "0" ] \ + && [ "$process_count" -ge 1 ] \ + && drain_controlled_preserve_ok \ + && drain_limits_ok; then + return 0 + fi + + if drain_controlled_preserve_ok && drain_limits_ok; then + return 0 + fi + + if unit_failclosed_ok "$DRAIN_UNIT" && [ "$process_count" = "0" ]; then + return 0 + fi + + return 1 +} + +drain_guard_mode() { + local active mainpid process_count + active="$(systemd_value "$DRAIN_UNIT" ActiveState)" + mainpid="$(systemd_value "$DRAIN_UNIT" MainPID)" + process_count="$(count_drain_lane_processes)" + if [ "$active" = "active" ] \ + && [ "${mainpid:-0}" != "0" ] \ + && [ "$process_count" -ge 1 ] \ + && drain_controlled_preserve_ok \ + && drain_limits_ok; then + echo "controlled_open" + elif drain_controlled_preserve_ok; then + echo "controlled_ready" + elif unit_failclosed_ok "$DRAIN_UNIT" && [ "$process_count" = "0" ]; then + echo "failclosed" + else + echo "blocked" + fi +} + +hard_units_bad_count() { + local unit bad=0 + for unit in "${HARD_FAILCLOSED_UNITS[@]}"; do + unit_failclosed_ok "$unit" || bad=$((bad + 1)) + done + echo "$bad" +} + +action_runner_bad_count() { + local unit bad=0 + while IFS= read -r unit; do + [ -n "$unit" ] || continue + unit_failclosed_ok "$unit" || bad=$((bad + 1)) + done < <(list_action_runner_units) + echo "$bad" +} + +runner_units_bad_count() { + local bad + bad="$(hard_units_bad_count)" + drain_guard_ok || bad=$((bad + 1)) + bad=$((bad + $(action_runner_bad_count))) + echo "$bad" +} + +write_metrics() { + local dir="$1" + local tmp + [ -d "$dir" ] || return 0 + tmp="$(mktemp)" + cat >"$tmp" </dev/null 2>&1 || true + rm -f "$tmp" +} + +print_unit_readback() { + local unit="$1" + local load active unitfile mainpid + load="$(systemd_value "$unit" LoadState)" + active="$(systemd_value "$unit" ActiveState)" + unitfile="$(systemd_value "$unit" UnitFileState)" + mainpid="$(systemd_value "$unit" MainPID)" + echo "RUNNER_UNIT $unit load=${load:-unknown} active=${active:-unknown} unitfile=${unitfile:-unknown} mainpid=${mainpid:-unknown}" +} + +print_readback() { + local unit + echo "ENFORCER_MODE=$MODE" + echo "ENFORCER_HOST_110=1" + echo "APPLY_PERFORMED=$APPLY_PERFORMED" + echo "ACTIVE_JOB_CONTAINERS=$(count_active_job_containers)" + echo "REGULAR_LANE_PROCESS_COUNT=$(count_regular_lane_processes)" + echo "DRAIN_LANE_PROCESS_COUNT=$(count_drain_lane_processes)" + echo "RUNNER_PROCESS_COUNT=$(count_runner_processes)" + echo "ACTION_RUNNER_PROCESS_COUNT=$(count_action_runner_processes)" + echo "ROOT_RESTORE_SOURCES_LEFT=$(root_restore_sources_left)" + echo "DRAIN_GUARD_MODE=$(drain_guard_mode)" + echo "JOB_CONTAINER_GUARD_OK=$({ job_container_guard_ok && echo 1; } || echo 0)" + echo "DRAIN_CAPACITY_OK=$({ drain_capacity_ok && echo 1; } || echo 0)" + echo "DRAIN_LABELS_OK=$({ drain_labels_ok && echo 1; } || echo 0)" + echo "DRAIN_BINARY_ELF=$({ drain_binary_elf && echo 1; } || echo 0)" + echo "DRAIN_LIMITS_OK=$({ drain_limits_ok && echo 1; } || echo 0)" + echo "RUNNER_UNITS_BAD_COUNT=$(runner_units_bad_count)" + for unit in "${HARD_FAILCLOSED_UNITS[@]}"; do + print_unit_readback "$unit" + done + print_unit_readback "$DRAIN_UNIT" + while IFS= read -r unit; do + [ -n "$unit" ] || continue + print_unit_readback "$unit" + done < <(list_action_runner_units) +} + +apply_failclosed() { + local preserve_drain=0 + APPLY_PERFORMED=1 + drain_controlled_preserve_ok && preserve_drain=1 + + if [ "$preserve_drain" = "1" ] && [ "$(count_drain_lane_processes)" != "0" ]; then + : + else + stop_active_job_containers + fi + stop_and_mask_hard_units + stop_and_mask_action_runner_units + kill_runner_processes + remove_legacy_sentinels + if [ "$preserve_drain" = "1" ]; then + : + else + stop_and_mask_unit "$DRAIN_UNIT" + pkill -KILL -f "^${DRAIN_BINARY}" >/dev/null 2>&1 || true + remove_drain_sentinels + fi + seal_opener_templates + seal_root_restore_sources + seal_quarantined_runner_sources + as_root systemctl daemon-reload >/dev/null 2>&1 || true +} + +if ! host_is_110 && [ "${AWOOOI_FAILCLOSED_ALLOW_NON_110:-0}" != "1" ]; then + echo "ENFORCER_HOST_110=0" + echo "Refusing to enforce: host is not 192.168.0.110. Set AWOOOI_FAILCLOSED_ALLOW_NON_110=1 only for controlled tests." >&2 + exit 65 +fi + +if [ "$MODE" = "apply" ]; then + apply_failclosed +fi + +write_metrics "/var/lib/node_exporter/textfile_collector" +write_metrics "/home/wooo/node_exporter_textfiles" +print_readback + +if job_container_guard_ok \ + && [ "$(count_regular_lane_processes)" = "0" ] \ + && [ "$(count_runner_processes)" = "0" ] \ + && [ "$(count_action_runner_processes)" = "0" ] \ + && [ "$(root_restore_sources_left)" = "0" ] \ + && [ "$(runner_units_bad_count)" = "0" ]; then + exit 0 +fi + +exit 2 diff --git a/scripts/reboot-recovery/p3-controlled-release-gate.sh b/scripts/reboot-recovery/p3-controlled-release-gate.sh index 87b8cad9..8ba61836 100755 --- a/scripts/reboot-recovery/p3-controlled-release-gate.sh +++ b/scripts/reboot-recovery/p3-controlled-release-gate.sh @@ -420,14 +420,23 @@ for p in /home/wooo/act-runner/act_runner /home/wooo/act-runner/act_runner.real- echo "$kind" | grep -qi "ELF" && bad=1 done for u in $(systemctl list-units "actions.runner.*" --all --no-legend --plain 2>/dev/null | awk "{print \$1}"); do + load=$(systemctl show "$u" -p LoadState --value) + unitfile=$(systemctl show "$u" -p UnitFileState --value) + mainpid=$(systemctl show "$u" -p MainPID --value) watchdog=$(systemctl show "$u" -p WatchdogUSec --value) quota=$(systemctl show "$u" -p CPUQuotaPerSecUSec --value) memory=$(systemctl show "$u" -p MemoryMax --value) state=$(systemctl show "$u" -p ActiveState --value) - echo "$u watchdog=$watchdog quota=$quota memory=$memory state=$state" - [ "$watchdog" = "0" ] || bad=1 - [ "$quota" != "infinity" ] && [ "$quota" != "0" ] || bad=1 - [ "$memory" != "infinity" ] && [ "$memory" != "0" ] || bad=1 + action_ok=0 + action_mode=blocked + if [ "$state" != "active" ] \ + && { [ "$load" = "masked" ] || [ "$load" = "not-found" ] || [ "$unitfile" = "masked" ] || [ "$unitfile" = "disabled" ]; } \ + && [ "${mainpid:-0}" = "0" ]; then + action_ok=1 + action_mode=github_disabled + fi + echo "$u mode=$action_mode load=$load unitfile=$unitfile state=$state mainpid=$mainpid watchdog=$watchdog quota=$quota memory=$memory ok=$action_ok" + [ "$action_ok" = "1" ] || bad=1 done echo "BAD_RUNNER_GUARDRAILS $bad" ' 2>&1); then