fix(runner): verify 110 controlled cd lane readiness
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Failing after 3m10s
CD Pipeline / build-and-deploy (push) Has been skipped
AWOOOI Harbor 110 Local Repair / workflow-shape (push) Successful in 1s
CD Pipeline / post-deploy-checks (push) Has been skipped
AWOOOI Harbor 110 Local Repair / harbor-110-local-repair (push) Has been cancelled

This commit is contained in:
Your Name
2026-06-30 21:37:24 +08:00
parent b932ae2174
commit f1758cdf0c
8 changed files with 754 additions and 1 deletions

View File

@@ -50664,3 +50664,26 @@ production browser smoke:
**下一步**
- commit/push 後等待新的 Gitea CD rundeploy marker 更新後讀回 `delivery-closure-workbench``awoooi-priority-work-order-readback``stockplatform-public-api-controlled-recovery-preflight`
## 2026-06-30 — 21:42 110 controlled `awoooi-host` lane readiness verifier
**完成內容**
- 新增 `ops/runner/check-awoooi-110-controlled-cd-lane-readiness.sh`,把目前 Gitea `harbor-110-local-repair` 顯示 `No matching online runner with label: awoooi-host` 的主線 blocker 收斂成可重跑 verifier。
- verifier 僅讀 metadata不讀 `.runner` 內容、不印 runner token檢查 110 host selector、controlled drain lane `capacity=1``awoooi-host:host` / `awoooi-ubuntu` labels、ELF binary、registration metadata 存在、systemd CPU / memory / tasks / `NoNewPrivileges` guardrails、legacy runner fail-closed、root restore-source left `0`、active action container / heavy process / load 壓力。
- `awoooi-cd-lane-drain.service``awoooi-startup-110.sh` 產生的 controlled drain unit 新增 `ConditionPathExists=/home/wooo/awoooi-cd-lane-drain/data/.runner`,避免 service active 但未註冊時假裝可承接 `awoooi-host` queue。
- `ops/runner/verify-awoooi-non110-cd-closure.py` 的 Harbor 110 no-matching next action 改為先在 110 跑 `check-awoooi-110-controlled-cd-lane-readiness.sh`,通過後再恢復 `awoooi-host` control path 並重讀 queue/closure。
**本地驗證結果**
- `pytest ops/runner/test_check_awoooi_110_controlled_cd_lane_readiness.py ops/runner/test_verify_awoooi_non110_cd_closure.py ops/runner/test_cd_controlled_runtime_profile.py ops/runner/test_guard_gitea_runner_pressure.py scripts/reboot-recovery/tests/test_cold_start_monitor_bounded_probes.py -q``56 passed`
- `python3.11 -m ruff check ...`:通過。
- `bash -n ops/runner/check-awoooi-110-controlled-cd-lane-readiness.sh scripts/reboot-recovery/awoooi-startup-110.sh`:通過。
- `python3.11 ops/runner/guard-gitea-runner-pressure.py --root .`:通過,`auto_branch_events_on_110=0``generic_runner_labels=0`
- `node scripts/ci/check-gitea-step-env-secrets.js`:通過。
**仍維持**
- 沒有讀 secret / token / `.env` / raw sessions / SQLite / auth沒有讀 `.runner` 內容。
- 沒有使用 GitHub / gh / GitHub API / GitHub Actions。
- 沒有重啟主機,沒有 Docker / Nginx / K3s / DB restart沒有 workflow_dispatch沒有 runtime write。
**下一步**
- commit/push 後讀回 Gitea queue / non110 CD closure / registry若仍是 `awoooi-host` no-matching下一個 controlled apply target 是在 110 上跑此 verifier依 safe_next_step 恢復 controlled drain lane 或補 registration metadata再重讀 Harbor repair queue。

View File

@@ -3,6 +3,7 @@ Description=AWOOOI controlled CD lane drain
After=network-online.target docker.service
Wants=network-online.target
Requires=docker.service
ConditionPathExists=/home/wooo/awoooi-cd-lane-drain/data/.runner
[Service]
Type=simple

View File

@@ -0,0 +1,453 @@
#!/usr/bin/env bash
set -euo pipefail
# Read-only verifier for the 110 controlled AWOOOI CD lane. It only prints
# metadata needed to decide whether the awoooi-host queue can be served safely.
# It never reads or prints runner registration contents.
TARGET_HOST_IP="${TARGET_HOST_IP:-192.168.0.110}"
CD_LANE_DRAIN_DIR="${CD_LANE_DRAIN_DIR:-/home/wooo/awoooi-cd-lane-drain}"
CD_LANE_DRAIN_SERVICE="${CD_LANE_DRAIN_SERVICE:-awoooi-cd-lane-drain.service}"
CD_LANE_DRAIN_BINARY="${CD_LANE_DRAIN_BINARY:-${CD_LANE_DRAIN_DIR}/awoooi_cd_lane_controlled}"
CD_LANE_DRAIN_CONFIG="${CD_LANE_DRAIN_CONFIG:-${CD_LANE_DRAIN_DIR}/config.yaml}"
CD_LANE_DRAIN_REGISTRATION_PATHS="${CD_LANE_DRAIN_REGISTRATION_PATHS:-${CD_LANE_DRAIN_DIR}/data/.runner ${CD_LANE_DRAIN_DIR}/.runner}"
PRIMARY_CD_LANE_SERVICE="${PRIMARY_CD_LANE_SERVICE:-awoooi-cd-lane.service}"
ROOT_RESTORE_PARENT="${ROOT_RESTORE_PARENT:-/root}"
MAX_CAPACITY="${MAX_CAPACITY:-1}"
MAX_HEAVY_PROCESS_COUNT="${MAX_HEAVY_PROCESS_COUNT:-0}"
MAX_ACTIVE_ACTION_CONTAINERS="${MAX_ACTIVE_ACTION_CONTAINERS:-0}"
MAX_LOAD_PER_CORE="${MAX_LOAD_PER_CORE:-1.25}"
REQUIRE_ACTIVE_SERVICE="${REQUIRE_ACTIVE_SERVICE:-1}"
REQUIRE_PRIMARY_LANE_FAILCLOSED="${REQUIRE_PRIMARY_LANE_FAILCLOSED:-1}"
LEGACY_RUNNER_SERVICE_NAMES="${LEGACY_RUNNER_SERVICE_NAMES:-awoooi-direct-runner-open.service awoooi-direct-runner.service gitea-act-runner-host.service gitea-act-runner-awoooi-controlled.service gitea-awoooi-controlled-runner.service gitea-act-runner-awoooi-open.service}"
LEGACY_RUNNER_BINARY_PATHS="${LEGACY_RUNNER_BINARY_PATHS:-/home/wooo/act-runner/act_runner /home/wooo/act-runner/act_runner.real-20260628-runner-pressure-guard /home/wooo/act-runner-controlled/act_runner /home/wooo/awoooi-controlled-runner/awoooi_controlled_runner}"
FORBIDDEN_LABEL_RE="${FORBIDDEN_LABEL_RE:-^(ubuntu-latest|ubuntu-[0-9].*|self-hosted|stockplatform.*|stock-platform.*|headless.*|playwright.*)$}"
BLOCKERS=()
WARNINGS=()
CONFIG_READY=0
BINARY_READY=0
REGISTRATION_READY=0
SERVICE_READY=0
LEGACY_FAILCLOSED=0
PRIMARY_LANE_FAILCLOSED=0
ROOT_RESTORE_LEFT="unknown"
section() {
printf '\n== %s ==\n' "$1"
}
blocker() {
BLOCKERS+=("$1")
printf 'BLOCKER %s\n' "$1"
}
warning() {
WARNINGS+=("$1")
printf 'WARNING %s\n' "$1"
}
command_exists() {
command -v "$1" >/dev/null 2>&1
}
host_ips() {
if command_exists ip; then
ip -o -4 addr show 2>/dev/null | awk '{print $4}' | sed 's#/.*##' | sort -u
return 0
fi
hostname -I 2>/dev/null | tr ' ' '\n' | awk 'NF' | sort -u || true
}
host_has_ip() {
local ip="$1"
host_ips | grep -qx "$ip"
}
systemd_cat() {
local unit="$1"
if systemctl cat "$unit" >/dev/null 2>&1; then
systemctl cat "$unit" 2>/dev/null
return 0
fi
return 1
}
systemd_show() {
local unit="$1"
systemctl show "$unit" \
-p LoadState \
-p ActiveState \
-p UnitFileState \
-p MainPID \
--no-pager 2>/dev/null || true
}
systemd_value() {
local unit="$1"
local key="$2"
systemd_show "$unit" | awk -F= -v k="$key" '$1 == k {print $2; found=1} END {if (!found) print ""}'
}
unit_failclosed_or_absent() {
local unit="$1"
local state load unitfile active mainpid
state="$(systemd_show "$unit" | tr '\n' ' ')"
load="$(printf '%s\n' "$state" | tr ' ' '\n' | awk -F= '$1 == "LoadState" {print $2; exit}')"
unitfile="$(printf '%s\n' "$state" | tr ' ' '\n' | awk -F= '$1 == "UnitFileState" {print $2; exit}')"
active="$(printf '%s\n' "$state" | tr ' ' '\n' | awk -F= '$1 == "ActiveState" {print $2; exit}')"
mainpid="$(printf '%s\n' "$state" | tr ' ' '\n' | awk -F= '$1 == "MainPID" {print $2; exit}')"
printf 'FAILCLOSED_UNIT unit=%s load=%s unitfile=%s active=%s mainpid=%s\n' \
"$unit" "${load:-unknown}" "${unitfile:-unknown}" "${active:-unknown}" "${mainpid:-unknown}"
if [ "${load:-}" = "not-found" ]; then
return 0
fi
if [ "${load:-}" = "masked" ] && [ "${unitfile:-}" = "masked" ] && [ "${active:-}" != "active" ] && [ "${mainpid:-0}" = "0" ]; then
return 0
fi
return 1
}
extract_runner_capacity() {
local config_path="$1"
awk '
/^runner:[[:space:]]*$/ {
in_runner=1
next
}
in_runner && /^[^[:space:]]/ && $0 !~ /^runner:[[:space:]]*$/ {
in_runner=0
}
in_runner && /^[[:space:]]*capacity:[[:space:]]*/ {
line=$0
sub(/^[[:space:]]*capacity:[[:space:]]*/, "", line)
gsub(/["'\'']/, "", line)
print line
exit
}
' "$config_path"
}
extract_runner_labels() {
local config_path="$1"
awk '
/^[[:space:]]*labels:[[:space:]]*$/ {
in_labels=1
next
}
in_labels && /^[[:space:]]*-[[:space:]]*/ {
line=$0
sub(/^[[:space:]]*-[[:space:]]*"/, "", line)
sub(/^[[:space:]]*-[[:space:]]*/, "", line)
sub(/"[[:space:]]*$/, "", line)
print line
next
}
in_labels && /^[^[:space:]]/ {
in_labels=0
}
' "$config_path"
}
label_name() {
printf '%s' "${1%%:*}"
}
active_action_container_count() {
if ! command_exists docker; then
echo 0
return 0
fi
docker ps --format '{{.Names}}' 2>/dev/null | grep -Ec '^GITEA-ACTIONS-TASK-' || true
}
heavy_process_count() {
{
pgrep -f '(^|/)(chrome|chromium|chromium-browser)( |$)' 2>/dev/null || true
pgrep -f 'playwright|stockplatform.*smoke|next build|turbo build|vite build' 2>/dev/null || true
} | sort -u | wc -l | tr -d ' '
}
load_per_core_ok() {
if [ ! -r /proc/loadavg ] || ! command_exists awk; then
warning "loadavg_unavailable"
return 0
fi
local cores load1 ratio
cores="$(getconf _NPROCESSORS_ONLN 2>/dev/null || echo 1)"
load1="$(awk '{print $1}' /proc/loadavg)"
ratio="$(awk -v load_value="$load1" -v cores="${cores:-1}" 'BEGIN { if (cores < 1) cores=1; printf "%.6f", load_value / cores }')"
printf 'LOAD_READBACK load1=%s cores=%s load_per_core=%s max=%s\n' "$load1" "$cores" "$ratio" "$MAX_LOAD_PER_CORE"
awk -v ratio="$ratio" -v max="$MAX_LOAD_PER_CORE" 'BEGIN { exit !(ratio <= max) }'
}
check_host_selector() {
section "host selector"
printf 'target_host_ip=%s\n' "$TARGET_HOST_IP"
printf 'host_ips=%s\n' "$(host_ips | paste -sd, -)"
if [ -n "$TARGET_HOST_IP" ] && ! host_has_ip "$TARGET_HOST_IP"; then
blocker "target_host_ip_not_present_${TARGET_HOST_IP}"
fi
}
check_config() {
section "controlled lane config"
local capacity labels label name has_host=0 has_ubuntu=0 forbidden=0
if [ ! -r "$CD_LANE_DRAIN_CONFIG" ]; then
printf 'CD_LANE_CONFIG path=%s readable=0\n' "$CD_LANE_DRAIN_CONFIG"
blocker "controlled_cd_lane_config_missing"
return 0
fi
capacity="$(extract_runner_capacity "$CD_LANE_DRAIN_CONFIG" | head -1)"
printf 'CD_LANE_CONFIG path=%s readable=1 capacity=%s max_capacity=%s\n' "$CD_LANE_DRAIN_CONFIG" "${capacity:-missing}" "$MAX_CAPACITY"
if ! printf '%s' "${capacity:-}" | grep -Eq '^[0-9]+$'; then
blocker "controlled_cd_lane_capacity_missing"
elif [ "$capacity" -gt "$MAX_CAPACITY" ]; then
blocker "controlled_cd_lane_capacity_too_high:${capacity}"
fi
labels="$(extract_runner_labels "$CD_LANE_DRAIN_CONFIG" || true)"
if [ -z "$labels" ]; then
blocker "controlled_cd_lane_labels_missing"
fi
while IFS= read -r label; do
[ -n "$label" ] || continue
name="$(label_name "$label")"
printf 'CD_LANE_LABEL label=%s name=%s\n' "$label" "$name"
if [ "$label" = "awoooi-host:host" ]; then
has_host=1
fi
case "$label" in
awoooi-ubuntu:docker://192.168.0.110:5000/awoooi/ci-runner:act-22.04)
has_ubuntu=1
;;
esac
if printf '%s' "$name" | grep -Eq "$FORBIDDEN_LABEL_RE"; then
forbidden=1
blocker "controlled_cd_lane_forbidden_label:${name}"
fi
if [ "$name" != "awoooi-host" ] && [ "$name" != "awoooi-ubuntu" ]; then
forbidden=1
blocker "controlled_cd_lane_unexpected_label:${name}"
fi
done <<<"$labels"
[ "$has_host" -eq 1 ] || blocker "controlled_cd_lane_awoooi_host_label_missing"
[ "$has_ubuntu" -eq 1 ] || blocker "controlled_cd_lane_awoooi_ubuntu_label_missing"
if [ "$has_host" -eq 1 ] && [ "$has_ubuntu" -eq 1 ] && [ "$forbidden" -eq 0 ] \
&& printf '%s' "${capacity:-}" | grep -Eq '^[0-9]+$' && [ "$capacity" -le "$MAX_CAPACITY" ]; then
CONFIG_READY=1
fi
}
check_binary() {
section "controlled lane binary"
local kind
kind="$(file -b "$CD_LANE_DRAIN_BINARY" 2>/dev/null || echo missing)"
printf 'CD_LANE_BINARY path=%s executable=%s kind=%s\n' \
"$CD_LANE_DRAIN_BINARY" "$([ -x "$CD_LANE_DRAIN_BINARY" ] && [ -f "$CD_LANE_DRAIN_BINARY" ] && echo 1 || echo 0)" "$kind"
if [ -x "$CD_LANE_DRAIN_BINARY" ] && [ -f "$CD_LANE_DRAIN_BINARY" ] && grep -qi "ELF" <<<"$kind"; then
BINARY_READY=1
else
blocker "controlled_cd_lane_binary_not_ready"
fi
}
check_registration() {
section "controlled lane registration metadata"
local registration found=0 mode size
for registration in $CD_LANE_DRAIN_REGISTRATION_PATHS; do
if [ -f "$registration" ] && [ -s "$registration" ]; then
mode="$(stat -c '%a' "$registration" 2>/dev/null || stat -f '%Lp' "$registration" 2>/dev/null || echo unknown)"
size="$(stat -c '%s' "$registration" 2>/dev/null || stat -f '%z' "$registration" 2>/dev/null || echo unknown)"
printf 'CD_LANE_REGISTRATION path=%s present=1 size_bytes=%s mode=%s content_read=false\n' "$registration" "$size" "$mode"
found=1
else
printf 'CD_LANE_REGISTRATION path=%s present=0 content_read=false\n' "$registration"
fi
done
if [ "$found" -eq 1 ]; then
REGISTRATION_READY=1
else
blocker "controlled_cd_lane_registration_missing"
fi
}
unit_has_required_limits() {
local text="$1"
grep -Eq '^[[:space:]]*CPUAccounting=true' <<<"$text" || return 1
grep -Eq '^[[:space:]]*CPUQuota=' <<<"$text" || return 1
grep -Eq '^[[:space:]]*MemoryAccounting=true' <<<"$text" || return 1
grep -Eq '^[[:space:]]*Memory(Max|High)=' <<<"$text" || return 1
grep -Eq '^[[:space:]]*TasksAccounting=true' <<<"$text" || return 1
grep -Eq '^[[:space:]]*TasksMax=' <<<"$text" || return 1
grep -Eq '^[[:space:]]*NoNewPrivileges=true' <<<"$text" || return 1
grep -Eq '^[[:space:]]*Restart=' <<<"$text" || return 1
return 0
}
unit_has_target_match() {
local text="$1"
grep -Fq -- "$CD_LANE_DRAIN_BINARY" <<<"$text" || return 1
grep -Fq -- "$CD_LANE_DRAIN_CONFIG" <<<"$text" || return 1
return 0
}
unit_has_registration_condition() {
local text="$1"
local registration
for registration in $CD_LANE_DRAIN_REGISTRATION_PATHS; do
if grep -Eq "^[[:space:]]*ConditionPathExists=${registration//\//\\/}[[:space:]]*$" <<<"$text"; then
return 0
fi
done
return 1
}
check_service() {
section "controlled lane service"
local text state active mainpid limits_ok=0 target_ok=0 condition_ok=0
if ! text="$(systemd_cat "$CD_LANE_DRAIN_SERVICE" 2>/dev/null)"; then
printf 'CD_LANE_SERVICE unit=%s installed=0\n' "$CD_LANE_DRAIN_SERVICE"
blocker "controlled_cd_lane_service_missing"
return 0
fi
state="$(systemd_show "$CD_LANE_DRAIN_SERVICE" | tr '\n' ' ')"
active="$(printf '%s\n' "$state" | tr ' ' '\n' | awk -F= '$1 == "ActiveState" {print $2; exit}')"
mainpid="$(printf '%s\n' "$state" | tr ' ' '\n' | awk -F= '$1 == "MainPID" {print $2; exit}')"
printf 'CD_LANE_SERVICE unit=%s installed=1 %s\n' "$CD_LANE_DRAIN_SERVICE" "$state"
if unit_has_required_limits "$text"; then
limits_ok=1
else
blocker "controlled_cd_lane_service_limits_missing"
fi
if unit_has_target_match "$text"; then
target_ok=1
else
blocker "controlled_cd_lane_service_target_mismatch"
fi
if unit_has_registration_condition "$text"; then
condition_ok=1
else
blocker "controlled_cd_lane_service_registration_condition_missing"
fi
printf 'CD_LANE_SERVICE_GUARDRAILS unit=%s active=%s main_pid=%s limits=%s target_match=%s registration_condition=%s\n' \
"$CD_LANE_DRAIN_SERVICE" "${active:-unknown}" "${mainpid:-0}" "$limits_ok" "$target_ok" "$condition_ok"
if [ "$REQUIRE_ACTIVE_SERVICE" = "1" ]; then
if [ "${active:-}" != "active" ] || ! printf '%s' "${mainpid:-0}" | grep -Eq '^[1-9][0-9]*$'; then
blocker "controlled_cd_lane_service_not_active"
fi
fi
if [ "$limits_ok" -eq 1 ] && [ "$target_ok" -eq 1 ] && [ "$condition_ok" -eq 1 ]; then
if [ "$REQUIRE_ACTIVE_SERVICE" != "1" ] || { [ "${active:-}" = "active" ] && printf '%s' "${mainpid:-0}" | grep -Eq '^[1-9][0-9]*$'; }; then
SERVICE_READY=1
fi
fi
}
check_failclosed_boundaries() {
section "legacy fail-closed boundary"
local unit legacy_blockers=0 path kind
if [ "$REQUIRE_PRIMARY_LANE_FAILCLOSED" = "1" ]; then
if unit_failclosed_or_absent "$PRIMARY_CD_LANE_SERVICE"; then
PRIMARY_LANE_FAILCLOSED=1
else
legacy_blockers=$((legacy_blockers + 1))
blocker "primary_cd_lane_not_failclosed:${PRIMARY_CD_LANE_SERVICE}"
fi
else
PRIMARY_LANE_FAILCLOSED=1
fi
for unit in $LEGACY_RUNNER_SERVICE_NAMES; do
if ! unit_failclosed_or_absent "$unit"; then
legacy_blockers=$((legacy_blockers + 1))
blocker "legacy_runner_unit_not_failclosed:${unit}"
fi
done
for path in $LEGACY_RUNNER_BINARY_PATHS; do
kind="$(file -b "$path" 2>/dev/null || echo missing)"
printf 'FAILCLOSED_BINARY path=%s kind=%s\n' "$path" "$kind"
if grep -qi "ELF" <<<"$kind"; then
legacy_blockers=$((legacy_blockers + 1))
blocker "legacy_runner_binary_restored:${path}"
fi
done
if [ "$legacy_blockers" -eq 0 ]; then
LEGACY_FAILCLOSED=1
fi
}
check_pressure() {
section "pressure readback"
local containers heavy
containers="$(active_action_container_count)"
heavy="$(heavy_process_count)"
printf 'ACTIVE_ACTION_CONTAINERS=%s max=%s\n' "$containers" "$MAX_ACTIVE_ACTION_CONTAINERS"
printf 'HEAVY_PROCESS_COUNT=%s max=%s\n' "$heavy" "$MAX_HEAVY_PROCESS_COUNT"
[ "$containers" -le "$MAX_ACTIVE_ACTION_CONTAINERS" ] || blocker "active_action_containers_present:${containers}"
[ "$heavy" -le "$MAX_HEAVY_PROCESS_COUNT" ] || blocker "heavy_processes_present:${heavy}"
load_per_core_ok || blocker "host_load_per_core_too_high"
}
check_restore_sources() {
section "restore-source readback"
if [ -d "$ROOT_RESTORE_PARENT" ]; then
ROOT_RESTORE_LEFT="$(find "$ROOT_RESTORE_PARENT" -maxdepth 1 -type d \( -name 'awoooi-cd-lane-disabled-*' -o -name 'awoooi-cd-lane-drain-disabled-*' \) -print 2>/dev/null | wc -l | tr -d ' ')"
else
ROOT_RESTORE_LEFT=0
fi
printf 'CD_LANE_ROOT_RESTORE_SOURCES parent=%s left=%s\n' "$ROOT_RESTORE_PARENT" "$ROOT_RESTORE_LEFT"
[ "$ROOT_RESTORE_LEFT" = "0" ] || blocker "cd_lane_root_restore_sources_left:${ROOT_RESTORE_LEFT}"
}
print_verdict() {
section "verdict"
printf 'CONFIG_READY=%s\n' "$CONFIG_READY"
printf 'BINARY_READY=%s\n' "$BINARY_READY"
printf 'REGISTRATION_READY=%s\n' "$REGISTRATION_READY"
printf 'SERVICE_READY=%s\n' "$SERVICE_READY"
printf 'LEGACY_FAILCLOSED=%s\n' "$LEGACY_FAILCLOSED"
printf 'PRIMARY_LANE_FAILCLOSED=%s\n' "$PRIMARY_LANE_FAILCLOSED"
printf 'WARNING_COUNT=%s\n' "${#WARNINGS[@]}"
printf 'BLOCKER_COUNT=%s\n' "${#BLOCKERS[@]}"
if [ "${#BLOCKERS[@]}" -eq 0 ]; then
printf 'AWOOOI_110_CONTROLLED_CD_LANE_READY=1\n'
printf 'safe_next_step=rerun_harbor_110_local_repair_queue_readback_and_non110_cd_closure_verifier\n'
return 0
fi
printf 'AWOOOI_110_CONTROLLED_CD_LANE_READY=0\n'
if [ "$REGISTRATION_READY" -eq 0 ]; then
printf 'safe_next_step=restore_or_register_awoooi_cd_lane_drain_registration_without_printing_token_then_rerun_this_verifier\n'
elif [ "$ROOT_RESTORE_LEFT" != "0" ]; then
printf 'safe_next_step=quarantine_cd_lane_root_restore_sources_then_rerun_this_verifier\n'
elif [ "$SERVICE_READY" -eq 0 ] && [ "$CONFIG_READY" -eq 1 ] && [ "$BINARY_READY" -eq 1 ]; then
printf 'safe_next_step=start_awoooi_cd_lane_drain_service_after_apply_window_then_rerun_this_verifier\n'
else
printf 'safe_next_step=fix_controlled_cd_lane_guardrail_blockers_then_rerun_this_verifier\n'
fi
return 1
}
main() {
section "audit metadata"
printf 'read_only=true\n'
printf 'secret_values_collected=false\n'
printf 'runner_token_read=false\n'
printf 'raw_runner_registration_read=false\n'
printf 'timestamp=%s\n' "$(date -Is 2>/dev/null || date)"
printf 'host=%s\n' "$(hostname 2>/dev/null || echo unknown)"
printf 'user=%s\n' "$(id -un 2>/dev/null || echo unknown)"
check_host_selector
check_config
check_binary
check_registration
check_service
check_failclosed_boundaries
check_restore_sources
check_pressure
print_verdict
}
main "$@"

View File

@@ -0,0 +1,267 @@
#!/usr/bin/env python3
from __future__ import annotations
import os
import subprocess
from pathlib import Path
ROOT = Path(__file__).resolve().parents[2]
VERIFIER = ROOT / "ops/runner/check-awoooi-110-controlled-cd-lane-readiness.sh"
def _write_fake_bin(path: Path, name: str, body: str) -> None:
target = path / name
target.write_text(body, encoding="utf-8")
target.chmod(0o755)
def _write_config(path: Path, *, forbidden_label: bool = False) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
labels = [
' - "awoooi-host:host"',
' - "awoooi-ubuntu:docker://192.168.0.110:5000/awoooi/ci-runner:act-22.04"',
]
if forbidden_label:
labels.append(' - "ubuntu-latest:docker://node:22"')
path.write_text(
"\n".join(
[
"runner:",
" capacity: 1",
" labels:",
*labels,
"",
]
),
encoding="utf-8",
)
def _write_unit(
path: Path,
*,
binary: Path,
config: Path,
registration: Path,
include_registration_condition: bool = True,
) -> None:
condition = f"ConditionPathExists={registration}\n" if include_registration_condition else ""
path.write_text(
f"""
[Unit]
Description=AWOOOI controlled CD lane drain
After=network-online.target docker.service
Wants=network-online.target
Requires=docker.service
{condition}
[Service]
Type=simple
User=wooo
WorkingDirectory={binary.parent}/data
Environment=HOME=/home/wooo
Environment=AWOOOI_CONTROLLED_RUNNER_OPEN=1
ExecStart={binary} daemon --config {config}
Restart=always
RestartSec=10
KillSignal=SIGINT
TimeoutStopSec=3700
SuccessExitStatus=0 130 143
CPUAccounting=true
CPUQuota=250%
MemoryAccounting=true
MemoryHigh=8G
MemoryMax=12G
TasksAccounting=true
TasksMax=512
NoNewPrivileges=true
""".strip()
+ "\n",
encoding="utf-8",
)
def _run_verifier(
tmp_path: Path,
*,
registration_present: bool = True,
forbidden_label: bool = False,
active_service: bool = True,
include_registration_condition: bool = True,
legacy_active: bool = False,
) -> subprocess.CompletedProcess[str]:
fake_bin = tmp_path / "bin"
unit_dir = tmp_path / "units"
root_restore = tmp_path / "root"
lane_dir = tmp_path / "awoooi-cd-lane-drain"
data_dir = lane_dir / "data"
fake_bin.mkdir()
unit_dir.mkdir()
root_restore.mkdir()
data_dir.mkdir(parents=True)
binary = lane_dir / "awoooi_cd_lane_controlled"
config = lane_dir / "config.yaml"
registration = data_dir / ".runner"
binary.write_bytes(b"\x7fELF controlled test binary\n")
binary.chmod(0o755)
_write_config(config, forbidden_label=forbidden_label)
if registration_present:
registration.write_text("secret-token-like-content-not-printed\n", encoding="utf-8")
_write_unit(
unit_dir / "awoooi-cd-lane-drain.service",
binary=binary,
config=config,
registration=registration,
include_registration_condition=include_registration_condition,
)
legacy_state = "active" if legacy_active else "inactive"
legacy_pid = "4321" if legacy_active else "0"
_write_fake_bin(
fake_bin,
"systemctl",
f"""#!/usr/bin/env bash
set -euo pipefail
cmd="${{1:-}}"; unit="${{2:-}}"
case "$cmd" in
show)
unit="${{2:-}}"
if [ -f "{unit_dir}/$unit" ]; then
printf 'LoadState=loaded\\nActiveState={"active" if active_service else "inactive"}\\nUnitFileState=enabled\\nMainPID={"1234" if active_service else "0"}\\n'
exit 0
fi
case "$unit" in
awoooi-cd-lane.service)
printf 'LoadState=masked\\nActiveState=inactive\\nUnitFileState=masked\\nMainPID=0\\n'
exit 0
;;
awoooi-direct-runner-open.service|awoooi-direct-runner.service|gitea-act-runner-host.service|gitea-act-runner-awoooi-controlled.service|gitea-awoooi-controlled-runner.service|gitea-act-runner-awoooi-open.service)
printf 'LoadState=%s\\nActiveState=%s\\nUnitFileState=%s\\nMainPID=%s\\n' '{"loaded" if legacy_active else "masked"}' "{legacy_state}" '{"enabled" if legacy_active else "masked"}' "{legacy_pid}"
exit 0
;;
esac
printf 'LoadState=not-found\\nActiveState=inactive\\nUnitFileState=\\nMainPID=0\\n'
exit 0
;;
cat)
if [ -f "{unit_dir}/$unit" ]; then cat "{unit_dir}/$unit"; exit 0; fi
exit 1
;;
esac
exit 1
""",
)
_write_fake_bin(
fake_bin,
"ip",
"""#!/usr/bin/env bash
if [ "${1:-}" = "-o" ] && [ "${2:-}" = "-4" ] && [ "${3:-}" = "addr" ]; then
printf '2: eth0 inet 192.168.0.110/24 brd 192.168.0.255 scope global eth0\\n'
exit 0
fi
exit 1
""",
)
_write_fake_bin(
fake_bin,
"docker",
"""#!/usr/bin/env bash
if [ "${1:-}" = "ps" ]; then exit 0; fi
exit 0
""",
)
_write_fake_bin(
fake_bin,
"pgrep",
"""#!/usr/bin/env bash
exit 1
""",
)
_write_fake_bin(
fake_bin,
"file",
f"""#!/usr/bin/env bash
case "${{*:-}}" in
*"{binary}") printf 'ELF 64-bit LSB executable\\n'; exit 0 ;;
esac
printf 'POSIX shell script\\n'
""",
)
env = {
**os.environ,
"PATH": f"{fake_bin}:{os.environ['PATH']}",
"CD_LANE_DRAIN_DIR": str(lane_dir),
"CD_LANE_DRAIN_BINARY": str(binary),
"CD_LANE_DRAIN_CONFIG": str(config),
"CD_LANE_DRAIN_REGISTRATION_PATHS": str(registration),
"ROOT_RESTORE_PARENT": str(root_restore),
"LEGACY_RUNNER_BINARY_PATHS": str(tmp_path / "legacy_act_runner"),
"MAX_HEAVY_PROCESS_COUNT": "0",
"MAX_ACTIVE_ACTION_CONTAINERS": "0",
}
return subprocess.run(
["bash", str(VERIFIER)],
check=False,
env=env,
text=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
def test_110_controlled_cd_lane_ready_without_printing_registration_content(
tmp_path: Path,
) -> None:
result = _run_verifier(tmp_path)
assert result.returncode == 0, result.stdout + result.stderr
assert "AWOOOI_110_CONTROLLED_CD_LANE_READY=1" in result.stdout
assert "runner_token_read=false" in result.stdout
assert "raw_runner_registration_read=false" in result.stdout
assert "content_read=false" in result.stdout
assert "secret-token-like-content" not in result.stdout
assert "CD_LANE_SERVICE_GUARDRAILS" in result.stdout
assert "registration_condition=1" in result.stdout
assert (
"safe_next_step=rerun_harbor_110_local_repair_queue_readback_and_non110_cd_closure_verifier"
in result.stdout
)
def test_110_controlled_cd_lane_blocks_missing_registration(tmp_path: Path) -> None:
result = _run_verifier(tmp_path, registration_present=False)
assert result.returncode == 1
assert "BLOCKER controlled_cd_lane_registration_missing" in result.stdout
assert "AWOOOI_110_CONTROLLED_CD_LANE_READY=0" in result.stdout
assert (
"safe_next_step=restore_or_register_awoooi_cd_lane_drain_registration_without_printing_token_then_rerun_this_verifier"
in result.stdout
)
def test_110_controlled_cd_lane_blocks_forbidden_generic_label(tmp_path: Path) -> None:
result = _run_verifier(tmp_path, forbidden_label=True)
assert result.returncode == 1
assert "BLOCKER controlled_cd_lane_forbidden_label:ubuntu-latest" in result.stdout
assert "BLOCKER controlled_cd_lane_unexpected_label:ubuntu-latest" in result.stdout
assert "AWOOOI_110_CONTROLLED_CD_LANE_READY=0" in result.stdout
def test_110_controlled_cd_lane_requires_registration_condition(tmp_path: Path) -> None:
result = _run_verifier(tmp_path, include_registration_condition=False)
assert result.returncode == 1
assert "BLOCKER controlled_cd_lane_service_registration_condition_missing" in result.stdout
assert "AWOOOI_110_CONTROLLED_CD_LANE_READY=0" in result.stdout
def test_110_controlled_cd_lane_blocks_active_legacy_runner(tmp_path: Path) -> None:
result = _run_verifier(tmp_path, legacy_active=True)
assert result.returncode == 1
assert "BLOCKER legacy_runner_unit_not_failclosed:gitea-act-runner-host.service" in result.stdout
assert "AWOOOI_110_CONTROLLED_CD_LANE_READY=0" in result.stdout

View File

@@ -204,7 +204,13 @@ def test_closure_verifier_prioritizes_harbor_110_runner_label_blocker() -> None:
== "awoooi-host"
)
assert payload["progress"]["next_blocked_step_id"] == "public_queue_runner_match"
assert "check_awoooi_110_controlled_cd_lane_readiness" in payload[
"next_actions"
][0]
assert "awoooi_host_runner_control_path" in payload["next_actions"][0]
assert "check_awoooi_110_controlled_cd_lane_readiness" in payload["progress"][
"next_blocked_step_action"
]
assert "awoooi_host_runner_control_path" in payload["progress"][
"next_blocked_step_action"
]

View File

@@ -316,7 +316,8 @@ def build_closure_verifier(
or bool(harbor_110_repair_no_matching_runner_label)
)
queue_runner_match_next_action = (
"restore_awoooi_host_runner_control_path_without_legacy_or_generic_labels_"
"run_ops_runner_check_awoooi_110_controlled_cd_lane_readiness_on_110_"
"then_restore_awoooi_host_runner_control_path_without_legacy_or_generic_labels_"
"then_rerun_harbor_110_repair_queue_readback"
if harbor_110_repair_no_matching_runner
else "rerun_public_queue_readback_until_no_matching_runner_is_absent"

View File

@@ -442,6 +442,7 @@ Description=AWOOOI controlled CD lane drain bypass for old queued guards
After=network-online.target docker.service
Wants=network-online.target
Requires=docker.service
ConditionPathExists=${CD_LANE_DRAIN_DIR}/data/.runner
[Service]
Type=simple

View File

@@ -84,6 +84,7 @@ def test_startup_110_opens_only_controlled_cd_lane_after_guardrails() -> None:
assert 'CD_LANE_ROOT_RESTORE_LEFT="$(cd_lane_root_restore_sources_left)"' in text
assert 'START_CD_LANE_ALLOWED=1' in text
assert 'install_controlled_cd_lane_drain_unit' in text
assert 'ConditionPathExists=${CD_LANE_DRAIN_DIR}/data/.runner' in text
assert 'systemctl unmask "$CD_LANE_DRAIN_SERVICE"' in text
assert 'systemctl enable --now "$CD_LANE_DRAIN_SERVICE"' in text
assert 'ensure_controlled_cd_lane_open' in text