From f426610d83e47dba050bd36d48d26ae5ce8ba9f8 Mon Sep 17 00:00:00 2001 From: ogt Date: Thu, 2 Jul 2026 11:01:49 +0800 Subject: [PATCH] fix(runner): add token-safe 110 drain registration guard --- .gitea/workflows/cd.yaml | 6 + docs/LOGBOOK.md | 26 +++ ops/runner/README.md | 27 +++ ...awoooi-110-controlled-cd-lane-readiness.sh | 70 +++++- ...ter-awoooi-110-controlled-cd-lane-drain.sh | 215 ++++++++++++++++++ .../test_cd_controlled_runtime_profile.py | 8 + ...awoooi_110_controlled_cd_lane_readiness.py | 38 ++++ ...ter_awoooi_110_controlled_cd_lane_drain.py | 139 +++++++++++ 8 files changed, 519 insertions(+), 10 deletions(-) create mode 100755 ops/runner/register-awoooi-110-controlled-cd-lane-drain.sh create mode 100644 ops/runner/test_register_awoooi_110_controlled_cd_lane_drain.py diff --git a/.gitea/workflows/cd.yaml b/.gitea/workflows/cd.yaml index 383c015f..621f16cd 100644 --- a/.gitea/workflows/cd.yaml +++ b/.gitea/workflows/cd.yaml @@ -516,6 +516,8 @@ jobs: ;; ops/runner/install-awoooi-non110-runner-user-service.sh) ;; + ops/runner/register-awoooi-110-controlled-cd-lane-drain.sh) + ;; ops/runner/test_read_public_gitea_actions_queue.py) ;; ops/runner/test_cd_controlled_runtime_profile.py) @@ -524,6 +526,8 @@ jobs: ;; ops/runner/test_install_awoooi_non110_runner_user_service.py) ;; + ops/runner/test_register_awoooi_110_controlled_cd_lane_drain.py) + ;; ops/runner/test_check_awoooi_110_controlled_cd_lane_readiness.py) ;; ops/runner/test_verify_awoooi_non110_cd_closure.py) @@ -827,6 +831,7 @@ jobs: ../../ops/runner/check-awoooi-110-controlled-cd-lane-readiness.sh \ ../../ops/runner/check-awoooi-non110-runner-readiness.sh \ ../../ops/runner/install-awoooi-non110-runner-user-service.sh \ + ../../ops/runner/register-awoooi-110-controlled-cd-lane-drain.sh \ ../../scripts/reboot-recovery/deploy-to-110.sh \ ../../scripts/reboot-recovery/deploy-to-188.sh \ ../../scripts/reboot-recovery/enforce-110-runner-failclosed.sh \ @@ -895,6 +900,7 @@ jobs: ../../ops/runner/test_cd_controlled_runtime_profile.py \ ../../ops/runner/test_check_awoooi_non110_runner_readiness.py \ ../../ops/runner/test_install_awoooi_non110_runner_user_service.py \ + ../../ops/runner/test_register_awoooi_110_controlled_cd_lane_drain.py \ ../../ops/runner/test_check_awoooi_110_controlled_cd_lane_readiness.py \ ../../ops/runner/test_verify_awoooi_non110_cd_closure.py \ ../../scripts/backup/tests/test_backup_status_contract.py \ diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index c5eec4fe..19bd7a35 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -52520,6 +52520,32 @@ production browser smoke: **下一步**: - commit / push workflow classifier 修法,讀回新的 Gitea CD,確認 tests 走 controlled-runtime 並跳過 B5;runner registration 仍需 token-safe path 補齊後才可啟動 `awoooi-cd-lane-drain.service`。 +## 2026-07-02 — P0 110 controlled drain token-safe registration helper + +**完成內容**: +- 新增 `ops/runner/register-awoooi-110-controlled-cd-lane-drain.sh`,把 live readback 剩餘的 `controlled_cd_lane_registration_missing` 從人工 token 混亂收斂成 token-safe helper。 +- helper 只允許 interactive TTY hidden prompt,不接受 `RUNNER_TOKEN` environment,不使用 `--token` argv,不列印 token,不讀 `.runner` 內容;registration 目標固定在 `/home/wooo/awoooi-cd-lane-drain/data/.runner`,符合 `awoooi-cd-lane-drain.service` 的 `ConditionPathExists`。 +- helper 支援 `--check` dry-run;若 registration 已存在,只輸出 metadata 與 `runner_registration_content_read=false`。`--enable-service` 必須顯式指定,且會先以 `REQUIRE_ACTIVE_SERVICE=0` 跑 110 readiness verifier,只有非 secret guardrails 通過後才啟動 `awoooi-cd-lane-drain.service`。 +- `.gitea/workflows/cd.yaml` 已把新 helper 與測試納入 controlled-runtime path allowlist、`bash -n` syntax check 與 focused pytest 清單,避免同類 runner registration source patch 再落入 B5 Docker socket。 +- live `--check` 發現 helper 以 service user `wooo` 身分無法讀 `/home/wooo/awoooi-cd-lane-drain/config.yaml`;因此同步補強 `check-awoooi-110-controlled-cd-lane-readiness.sh`,要求 service user 也能讀 config / registration、執行 ELF binary,避免 root 角度的 `CONFIG_READY=1` 造成 false-green。 +- 110 交叉 readback 確認 live 又被回封成 config missing、binary shell stub、unit masked;verifier 新增 `SERVICE_GUARDRAILS_READY`,並把 safe next step 改為先恢復 controlled drain staging artifacts,再處理 registration,避免在 staging 不存在時誤導成只缺 token。 + +**本地驗證結果**: +- `bash -n ops/runner/register-awoooi-110-controlled-cd-lane-drain.sh ops/runner/check-awoooi-110-controlled-cd-lane-readiness.sh .gitea/workflows/cd.yaml`:通過。 +- `python3.11 -m pytest ops/runner/test_register_awoooi_110_controlled_cd_lane_drain.py ops/runner/test_cd_controlled_runtime_profile.py ops/runner/test_check_awoooi_110_controlled_cd_lane_readiness.py -q`:`55 passed`。 +- `python3 ops/runner/guard-gitea-runner-pressure.py --root .`:通過,`auto_branch_events_on_110=0`、`generic_runner_labels=0`。 +- `node scripts/ci/check-gitea-step-env-secrets.js`:通過,`no Gitea run/with secrets or legacy Telegram routes`。 +- `git diff --check`:通過。 + +**110 read-only readback**: +- 新版 verifier 透過 SSH stdin / `sudo -n bash -s` 只讀執行,未部署、未寫 host;讀回 `CONFIG_READY=0`、`BINARY_READY=0`、`REGISTRATION_READY=0`、`SERVICE_READY=0`、`SERVICE_GUARDRAILS_READY=0`。 +- live blockers:`controlled_cd_lane_config_missing`、`controlled_cd_lane_binary_not_ready`、`controlled_cd_lane_registration_missing`、service limits / target / registration condition missing、`controlled_cd_lane_service_not_active`、`heavy_processes_present`。 +- safe next step 已修正為 `restore_awoooi_cd_lane_drain_staging_artifacts_then_rerun_this_verifier`。 + +**仍維持**: +- 本段未讀 secret / runner token / `.runner` 內容 / `.env` / raw sessions / SQLite / auth。 +- 本段未註冊 runner、未啟動 service、未做 host write、未重啟 Docker / Nginx / K3s / DB / firewall。 + ## 2026-07-01 — 08:50 P0 188 DB circuit breaker post-push readback **完成內容**: diff --git a/ops/runner/README.md b/ops/runner/README.md index a3350bdc..99fd4e49 100644 --- a/ops/runner/README.md +++ b/ops/runner/README.md @@ -85,6 +85,33 @@ sanitized JSON;不得收集 token、不得 workflow_dispatch、不得註冊或 不得操作 host / Docker / K8s。若 unauthenticated actions list API 回 `401 token is required`,這是 readback evidence,不是要求 Codex 取得 token。 +### 110 controlled CD drain lane registration + +2026-07-02 起,110 專用 `awoooi-cd-lane-drain.service` 的 runner registration +必須走 `register-awoooi-110-controlled-cd-lane-drain.sh`。此 helper 與 188 +non-110 helper 採同一個安全契約:只允許 interactive TTY hidden prompt, +不接受 `RUNNER_TOKEN` environment,不使用 `--token` argv,不列印 token,不讀 +`.runner` 內容。 + +先在 110 上做非 secret dry-run: + +```bash +ops/runner/register-awoooi-110-controlled-cd-lane-drain.sh --check +``` + +正式註冊時必須先把 helper 放到 110,再用 interactive TTY 執行;不要把 script +透過 stdin pipe 進去跑正式註冊,因為 token entry 需要 TTY: + +```bash +ssh -tt wooo@192.168.0.110 'cd /home/wooo/awoooi-cd-lane-drain && /path/to/register-awoooi-110-controlled-cd-lane-drain.sh' +``` + +若要在註冊成功後同一輪受控啟動 service,必須明確加 `--enable-service`。helper +會先用 `REQUIRE_ACTIVE_SERVICE=0 check-awoooi-110-controlled-cd-lane-readiness.sh` +確認 config、ELF binary、registration metadata、systemd guardrails、 +legacy fail-closed、root restore-source 與 host pressure 都通過後,才會 +`systemctl enable --now awoooi-cd-lane-drain.service`。 + ### 監控 ```bash diff --git a/ops/runner/check-awoooi-110-controlled-cd-lane-readiness.sh b/ops/runner/check-awoooi-110-controlled-cd-lane-readiness.sh index d0ff5b23..3cf0c284 100755 --- a/ops/runner/check-awoooi-110-controlled-cd-lane-readiness.sh +++ b/ops/runner/check-awoooi-110-controlled-cd-lane-readiness.sh @@ -8,6 +8,7 @@ set -euo pipefail TARGET_HOST_IP="${TARGET_HOST_IP:-192.168.0.110}" CD_LANE_DRAIN_DIR="${CD_LANE_DRAIN_DIR:-/home/wooo/awoooi-cd-lane-drain}" CD_LANE_DRAIN_SERVICE="${CD_LANE_DRAIN_SERVICE:-awoooi-cd-lane-drain.service}" +CD_LANE_DRAIN_USER="${CD_LANE_DRAIN_USER:-wooo}" CD_LANE_DRAIN_BINARY="${CD_LANE_DRAIN_BINARY:-${CD_LANE_DRAIN_DIR}/awoooi_cd_lane_controlled}" CD_LANE_DRAIN_CONFIG="${CD_LANE_DRAIN_CONFIG:-${CD_LANE_DRAIN_DIR}/config.yaml}" CD_LANE_DRAIN_REGISTRATION_PATHS="${CD_LANE_DRAIN_REGISTRATION_PATHS:-${CD_LANE_DRAIN_DIR}/data/.runner ${CD_LANE_DRAIN_DIR}/.runner}" @@ -29,6 +30,7 @@ CONFIG_READY=0 BINARY_READY=0 REGISTRATION_READY=0 SERVICE_READY=0 +SERVICE_GUARDRAILS_READY=0 LEGACY_FAILCLOSED=0 PRIMARY_LANE_FAILCLOSED=0 ROOT_RESTORE_LEFT="unknown" @@ -51,6 +53,26 @@ command_exists() { command -v "$1" >/dev/null 2>&1 } +service_user_test() { + local flag="$1" + local path="$2" + if [ -z "$CD_LANE_DRAIN_USER" ] || [ "$CD_LANE_DRAIN_USER" = "$(id -un 2>/dev/null || true)" ]; then + test "$flag" "$path" + return $? + fi + if [ "$(id -u 2>/dev/null || echo 1)" = "0" ]; then + if command_exists sudo; then + sudo -n -u "$CD_LANE_DRAIN_USER" test "$flag" "$path" 2>/dev/null + return $? + fi + if command_exists runuser; then + runuser -u "$CD_LANE_DRAIN_USER" -- test "$flag" "$path" 2>/dev/null + return $? + fi + fi + test "$flag" "$path" +} + host_ips() { if command_exists ip; then ip -o -4 addr show 2>/dev/null | awk '{print $4}' | sed 's#/.*##' | sort -u @@ -192,14 +214,20 @@ check_host_selector() { check_config() { section "controlled lane config" - local capacity labels label name has_host=0 has_ubuntu=0 forbidden=0 + local capacity labels label name has_host=0 has_ubuntu=0 forbidden=0 service_user_readable=0 if [ ! -r "$CD_LANE_DRAIN_CONFIG" ]; then printf 'CD_LANE_CONFIG path=%s readable=0\n' "$CD_LANE_DRAIN_CONFIG" blocker "controlled_cd_lane_config_missing" return 0 fi + if service_user_test -r "$CD_LANE_DRAIN_CONFIG"; then + service_user_readable=1 + else + blocker "controlled_cd_lane_config_not_readable_by_service_user:${CD_LANE_DRAIN_USER}" + fi capacity="$(extract_runner_capacity "$CD_LANE_DRAIN_CONFIG" | head -1)" - printf 'CD_LANE_CONFIG path=%s readable=1 capacity=%s max_capacity=%s\n' "$CD_LANE_DRAIN_CONFIG" "${capacity:-missing}" "$MAX_CAPACITY" + printf 'CD_LANE_CONFIG path=%s readable=1 service_user=%s service_user_readable=%s capacity=%s max_capacity=%s\n' \ + "$CD_LANE_DRAIN_CONFIG" "$CD_LANE_DRAIN_USER" "$service_user_readable" "${capacity:-missing}" "$MAX_CAPACITY" if ! printf '%s' "${capacity:-}" | grep -Eq '^[0-9]+$'; then blocker "controlled_cd_lane_capacity_missing" elif [ "$capacity" -gt "$MAX_CAPACITY" ]; then @@ -235,6 +263,7 @@ check_config() { [ "$has_host" -eq 1 ] || blocker "controlled_cd_lane_awoooi_host_label_missing" [ "$has_ubuntu" -eq 1 ] || blocker "controlled_cd_lane_awoooi_ubuntu_label_missing" if [ "$has_host" -eq 1 ] && [ "$has_ubuntu" -eq 1 ] && [ "$forbidden" -eq 0 ] \ + && [ "$service_user_readable" -eq 1 ] \ && printf '%s' "${capacity:-}" | grep -Eq '^[0-9]+$' && [ "$capacity" -le "$MAX_CAPACITY" ]; then CONFIG_READY=1 fi @@ -242,11 +271,17 @@ check_config() { check_binary() { section "controlled lane binary" - local kind + local kind service_user_executable=0 kind="$(file -b "$CD_LANE_DRAIN_BINARY" 2>/dev/null || echo missing)" - printf 'CD_LANE_BINARY path=%s executable=%s kind=%s\n' \ - "$CD_LANE_DRAIN_BINARY" "$([ -x "$CD_LANE_DRAIN_BINARY" ] && [ -f "$CD_LANE_DRAIN_BINARY" ] && echo 1 || echo 0)" "$kind" - if [ -x "$CD_LANE_DRAIN_BINARY" ] && [ -f "$CD_LANE_DRAIN_BINARY" ] && grep -qi "ELF" <<<"$kind"; then + if service_user_test -x "$CD_LANE_DRAIN_BINARY"; then + service_user_executable=1 + else + blocker "controlled_cd_lane_binary_not_executable_by_service_user:${CD_LANE_DRAIN_USER}" + fi + printf 'CD_LANE_BINARY path=%s executable=%s service_user=%s service_user_executable=%s kind=%s\n' \ + "$CD_LANE_DRAIN_BINARY" "$([ -x "$CD_LANE_DRAIN_BINARY" ] && [ -f "$CD_LANE_DRAIN_BINARY" ] && echo 1 || echo 0)" \ + "$CD_LANE_DRAIN_USER" "$service_user_executable" "$kind" + if [ -x "$CD_LANE_DRAIN_BINARY" ] && [ -f "$CD_LANE_DRAIN_BINARY" ] && [ "$service_user_executable" -eq 1 ] && grep -qi "ELF" <<<"$kind"; then BINARY_READY=1 else blocker "controlled_cd_lane_binary_not_ready" @@ -255,13 +290,22 @@ check_binary() { check_registration() { section "controlled lane registration metadata" - local registration found=0 mode size + local registration found=0 mode size service_user_readable=0 for registration in $CD_LANE_DRAIN_REGISTRATION_PATHS; do if [ -f "$registration" ] && [ -s "$registration" ]; then + service_user_readable=0 + if service_user_test -r "$registration"; then + service_user_readable=1 + else + blocker "controlled_cd_lane_registration_not_readable_by_service_user:${CD_LANE_DRAIN_USER}" + fi mode="$(stat -c '%a' "$registration" 2>/dev/null || stat -f '%Lp' "$registration" 2>/dev/null || echo unknown)" size="$(stat -c '%s' "$registration" 2>/dev/null || stat -f '%z' "$registration" 2>/dev/null || echo unknown)" - printf 'CD_LANE_REGISTRATION path=%s present=1 size_bytes=%s mode=%s content_read=false\n' "$registration" "$size" "$mode" - found=1 + printf 'CD_LANE_REGISTRATION path=%s present=1 size_bytes=%s mode=%s service_user=%s service_user_readable=%s content_read=false\n' \ + "$registration" "$size" "$mode" "$CD_LANE_DRAIN_USER" "$service_user_readable" + if [ "$service_user_readable" -eq 1 ]; then + found=1 + fi else printf 'CD_LANE_REGISTRATION path=%s present=0 content_read=false\n' "$registration" fi @@ -333,6 +377,9 @@ check_service() { fi printf 'CD_LANE_SERVICE_GUARDRAILS unit=%s active=%s main_pid=%s limits=%s target_match=%s registration_condition=%s\n' \ "$CD_LANE_DRAIN_SERVICE" "${active:-unknown}" "${mainpid:-0}" "$limits_ok" "$target_ok" "$condition_ok" + if [ "$limits_ok" -eq 1 ] && [ "$target_ok" -eq 1 ] && [ "$condition_ok" -eq 1 ]; then + SERVICE_GUARDRAILS_READY=1 + fi if [ "$REQUIRE_ACTIVE_SERVICE" = "1" ]; then if [ "${active:-}" != "active" ] || ! printf '%s' "${mainpid:-0}" | grep -Eq '^[1-9][0-9]*$'; then blocker "controlled_cd_lane_service_not_active" @@ -407,6 +454,7 @@ print_verdict() { printf 'BINARY_READY=%s\n' "$BINARY_READY" printf 'REGISTRATION_READY=%s\n' "$REGISTRATION_READY" printf 'SERVICE_READY=%s\n' "$SERVICE_READY" + printf 'SERVICE_GUARDRAILS_READY=%s\n' "$SERVICE_GUARDRAILS_READY" printf 'LEGACY_FAILCLOSED=%s\n' "$LEGACY_FAILCLOSED" printf 'PRIMARY_LANE_FAILCLOSED=%s\n' "$PRIMARY_LANE_FAILCLOSED" printf 'WARNING_COUNT=%s\n' "${#WARNINGS[@]}" @@ -417,7 +465,9 @@ print_verdict() { return 0 fi printf 'AWOOOI_110_CONTROLLED_CD_LANE_READY=0\n' - if [ "$REGISTRATION_READY" -eq 0 ]; then + if [ "$CONFIG_READY" -eq 0 ] || [ "$BINARY_READY" -eq 0 ] || [ "$SERVICE_GUARDRAILS_READY" -eq 0 ]; then + printf 'safe_next_step=restore_awoooi_cd_lane_drain_staging_artifacts_then_rerun_this_verifier\n' + elif [ "$REGISTRATION_READY" -eq 0 ]; then printf 'safe_next_step=restore_or_register_awoooi_cd_lane_drain_registration_without_printing_token_then_rerun_this_verifier\n' elif [ "$ROOT_RESTORE_LEFT" != "0" ]; then printf 'safe_next_step=quarantine_cd_lane_root_restore_sources_then_rerun_this_verifier\n' diff --git a/ops/runner/register-awoooi-110-controlled-cd-lane-drain.sh b/ops/runner/register-awoooi-110-controlled-cd-lane-drain.sh new file mode 100755 index 00000000..f5018d3d --- /dev/null +++ b/ops/runner/register-awoooi-110-controlled-cd-lane-drain.sh @@ -0,0 +1,215 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Register the AWOOOI 110 controlled CD drain lane without putting the runner +# token in shell history, argv, environment, logs, or command output. Codex must +# not run the token entry path; it is for an interactive TTY on 110. + +GITEA_INSTANCE="${GITEA_INSTANCE:-http://192.168.0.110:3001}" +RUNNER_NAME="${RUNNER_NAME:-awoooi-110-cd-lane-drain}" +RUNNER_LABELS="${RUNNER_LABELS:-awoooi-host:host,awoooi-ubuntu:docker://192.168.0.110:5000/awoooi/ci-runner:act-22.04}" +RUNNER_DIR="${RUNNER_DIR:-/home/wooo/awoooi-cd-lane-drain}" +RUNNER_WORKDIR="${RUNNER_WORKDIR:-${RUNNER_DIR}/data}" +RUNNER_BINARY="${RUNNER_BINARY:-${RUNNER_DIR}/awoooi_cd_lane_controlled}" +RUNNER_CONFIG="${RUNNER_CONFIG:-${RUNNER_DIR}/config.yaml}" +RUNNER_REGISTRATION="${RUNNER_REGISTRATION:-${RUNNER_WORKDIR}/.runner}" +SERVICE_NAME="${SERVICE_NAME:-awoooi-cd-lane-drain.service}" +READINESS_VERIFIER="${READINESS_VERIFIER:-/usr/local/bin/check-awoooi-110-controlled-cd-lane-readiness.sh}" +DRY_RUN=0 +ENABLE_SERVICE_AFTER_REGISTRATION=0 + +usage() { + cat <<'EOF' +Usage: + ops/runner/register-awoooi-110-controlled-cd-lane-drain.sh --check + ops/runner/register-awoooi-110-controlled-cd-lane-drain.sh + ops/runner/register-awoooi-110-controlled-cd-lane-drain.sh --enable-service + +Remote registration: + Copy this script to 110 first, then run it from ssh -tt on the target host. + Do not pipe the script over stdin for the real registration path, because + token entry needs the interactive TTY. + +Environment overrides: + GITEA_INSTANCE, RUNNER_NAME, RUNNER_LABELS, RUNNER_DIR, RUNNER_WORKDIR, + RUNNER_BINARY, RUNNER_CONFIG, RUNNER_REGISTRATION, SERVICE_NAME, + READINESS_VERIFIER + +Safety contract: + - requires an interactive TTY for token entry + - reads the runner token with terminal echo disabled + - refuses RUNNER_TOKEN from the environment + - feeds the token to act_runner over stdin, not argv + - never prints the token or .runner content + - refuses to overwrite an existing non-empty registration unless + ALLOW_REREGISTER=1 is set + - only starts the service when --enable-service is explicit and the + non-secret readiness verifier passes with REQUIRE_ACTIVE_SERVICE=0 +EOF +} + +log() { + printf '%s\n' "$*" +} + +die() { + printf 'ERROR %s\n' "$*" >&2 + exit 1 +} + +metadata() { + log "secret_values_collected_by_codex=false" + log "runner_token_echoed=false" + log "runner_token_in_argv=false" + log "raw_runner_registration_read=false" + log "gitea_instance=${GITEA_INSTANCE}" + log "runner_name=${RUNNER_NAME}" + log "runner_labels=${RUNNER_LABELS}" + log "runner_dir=${RUNNER_DIR}" + log "runner_workdir=${RUNNER_WORKDIR}" + log "runner_binary=${RUNNER_BINARY}" + log "runner_config=${RUNNER_CONFIG}" + log "runner_registration=${RUNNER_REGISTRATION}" + log "service_name=${SERVICE_NAME}" + log "readiness_verifier=${READINESS_VERIFIER}" + log "enable_service_after_registration=${ENABLE_SERVICE_AFTER_REGISTRATION}" +} + +preflight() { + [ -d "$RUNNER_DIR" ] || die "runner_dir_missing:${RUNNER_DIR}" + [ -d "$RUNNER_WORKDIR" ] || die "runner_workdir_missing:${RUNNER_WORKDIR}" + [ -x "$RUNNER_BINARY" ] || die "runner_binary_missing_or_not_executable:${RUNNER_BINARY}" + [ -r "$RUNNER_CONFIG" ] || die "runner_config_missing_or_not_readable:${RUNNER_CONFIG}" + + if [ -s "$RUNNER_REGISTRATION" ] && [ "${ALLOW_REREGISTER:-0}" != "1" ]; then + log "runner_registration_present=1" + log "runner_registration_content_read=false" + if [ "$ENABLE_SERVICE_AFTER_REGISTRATION" = "1" ]; then + if [ "$DRY_RUN" = "1" ]; then + log "dry_run=true" + log "safe_next_step=enable_service_after_registration_with_verifier" + exit 0 + fi + enable_service + exit $? + fi + log "safe_next_step=enable_awoooi_cd_lane_drain_service_and_rerun_110_readiness_verifier" + exit 0 + fi +} + +require_tty() { + if [ -t 0 ]; then + return 0 + fi + { : /dev/null || die "interactive_tty_required" +} + +read_token_from_tty() { + local prompt="Gitea runner token: " + set +x + HISTFILE=/dev/null + export HISTFILE + if { : /dev/null; then + IFS= read -r -s -p "$prompt" RUNNER_TOKEN /dev/tty + else + IFS= read -r -s -p "$prompt" RUNNER_TOKEN + printf '\n' + fi + [ -n "${RUNNER_TOKEN:-}" ] || die "runner_token_empty" +} + +register_runner() { + cd "$RUNNER_WORKDIR" + set +x + printf '%s\n' "$RUNNER_TOKEN" | "$RUNNER_BINARY" register \ + --instance "$GITEA_INSTANCE" \ + --name "$RUNNER_NAME" \ + --labels "$RUNNER_LABELS" \ + --config "$RUNNER_CONFIG" + unset RUNNER_TOKEN +} + +postcheck_registration() { + [ -s "$RUNNER_REGISTRATION" ] || die "runner_registration_not_created:${RUNNER_REGISTRATION}" + chmod go-rwx "$RUNNER_REGISTRATION" 2>/dev/null || true + log "runner_registration_present=1" + log "runner_registration_content_read=false" +} + +verifier_available() { + [ -x "$READINESS_VERIFIER" ] +} + +pre_enable_guard() { + if ! verifier_available; then + die "readiness_verifier_missing_or_not_executable:${READINESS_VERIFIER}" + fi + log "pre_enable_verifier_start=1" + REQUIRE_ACTIVE_SERVICE=0 "$READINESS_VERIFIER" +} + +enable_service() { + pre_enable_guard + log "service_enable_requested=1" + sudo -n systemctl daemon-reload + sudo -n systemctl enable --now "$SERVICE_NAME" + log "service_enable_attempted=1" + if verifier_available; then + "$READINESS_VERIFIER" + else + log "safe_next_step=rerun_110_controlled_cd_lane_readiness_verifier" + fi +} + +main() { + while [ "$#" -gt 0 ]; do + case "$1" in + --help|-h) + usage + exit 0 + ;; + --check|--dry-run) + DRY_RUN=1 + ;; + --enable-service) + ENABLE_SERVICE_AFTER_REGISTRATION=1 + ;; + *) + usage >&2 + exit 2 + ;; + esac + shift + done + + umask 077 + trap 'unset RUNNER_TOKEN 2>/dev/null || true' EXIT + + if [ "${RUNNER_TOKEN+x}" = "x" ]; then + die "runner_token_env_not_allowed" + fi + + metadata + preflight + + if [ "$DRY_RUN" = "1" ]; then + log "dry_run=true" + log "safe_next_step=run_this_script_from_interactive_tty_without_capturing_token" + exit 0 + fi + + require_tty + read_token_from_tty + register_runner + postcheck_registration + + if [ "$ENABLE_SERVICE_AFTER_REGISTRATION" = "1" ]; then + enable_service + else + log "safe_next_step=enable_awoooi_cd_lane_drain_service_and_rerun_110_readiness_verifier" + fi +} + +main "$@" diff --git a/ops/runner/test_cd_controlled_runtime_profile.py b/ops/runner/test_cd_controlled_runtime_profile.py index 632073d9..70805625 100644 --- a/ops/runner/test_cd_controlled_runtime_profile.py +++ b/ops/runner/test_cd_controlled_runtime_profile.py @@ -371,12 +371,16 @@ def test_non110_runner_keepalive_sources_stay_on_controlled_runtime_profile() -> expected_sources = [ "ops/runner/check-awoooi-non110-runner-readiness.sh)", "ops/runner/install-awoooi-non110-runner-user-service.sh)", + "ops/runner/register-awoooi-110-controlled-cd-lane-drain.sh)", "ops/runner/test_check_awoooi_non110_runner_readiness.py)", "ops/runner/test_install_awoooi_non110_runner_user_service.py)", + "ops/runner/test_register_awoooi_110_controlled_cd_lane_drain.py)", "../../ops/runner/check-awoooi-non110-runner-readiness.sh", "../../ops/runner/install-awoooi-non110-runner-user-service.sh", + "../../ops/runner/register-awoooi-110-controlled-cd-lane-drain.sh", "../../ops/runner/test_check_awoooi_non110_runner_readiness.py", "../../ops/runner/test_install_awoooi_non110_runner_user_service.py", + "../../ops/runner/test_register_awoooi_110_controlled_cd_lane_drain.py", ] for source in expected_sources: assert source in text @@ -755,7 +759,9 @@ def test_post_start_recovery_verifiers_stay_on_controlled_runtime_profile() -> N "ops/reboot-recovery/full-stack-cold-start-baseline.yml)", "ops/runner/awoooi-cd-lane-drain.service)", "ops/runner/check-awoooi-110-controlled-cd-lane-readiness.sh)", + "ops/runner/register-awoooi-110-controlled-cd-lane-drain.sh)", "ops/runner/test_check_awoooi_110_controlled_cd_lane_readiness.py)", + "ops/runner/test_register_awoooi_110_controlled_cd_lane_drain.py)", "scripts/backup/gitea-repo-bundle-backup.sh)", "scripts/ops/backup-health-textfile-exporter.py)", "scripts/ops/docker-disk-pressure-retention-cleanup.py)", @@ -795,6 +801,7 @@ def test_post_start_recovery_verifiers_stay_on_controlled_runtime_profile() -> N "../../ops/monitoring/alerts.yml", "../../ops/reboot-recovery/full-stack-cold-start-baseline.yml", "../../ops/runner/check-awoooi-110-controlled-cd-lane-readiness.sh", + "../../ops/runner/register-awoooi-110-controlled-cd-lane-drain.sh", "../../scripts/reboot-recovery/deploy-to-110.sh", "../../scripts/reboot-recovery/enforce-110-runner-failclosed.sh", "../../scripts/reboot-recovery/recover-110-control-path-and-harbor-local.sh", @@ -823,6 +830,7 @@ def test_post_start_recovery_verifiers_stay_on_controlled_runtime_profile() -> N "../../scripts/reboot-recovery/tests/test_recover_110_control_path_and_harbor_local.py", "../../scripts/reboot-recovery/tests/test_momo_source_arrival_gate.py", "../../ops/runner/test_check_awoooi_110_controlled_cd_lane_readiness.py", + "../../ops/runner/test_register_awoooi_110_controlled_cd_lane_drain.py", ] for source in expected_sources: assert source in text diff --git a/ops/runner/test_check_awoooi_110_controlled_cd_lane_readiness.py b/ops/runner/test_check_awoooi_110_controlled_cd_lane_readiness.py index 2df31b0b..1374fe66 100644 --- a/ops/runner/test_check_awoooi_110_controlled_cd_lane_readiness.py +++ b/ops/runner/test_check_awoooi_110_controlled_cd_lane_readiness.py @@ -218,9 +218,13 @@ def test_110_controlled_cd_lane_ready_without_printing_registration_content( assert result.returncode == 0, result.stdout + result.stderr assert "AWOOOI_110_CONTROLLED_CD_LANE_READY=1" in result.stdout + assert "SERVICE_GUARDRAILS_READY=1" in result.stdout assert "runner_token_read=false" in result.stdout assert "raw_runner_registration_read=false" in result.stdout assert "content_read=false" in result.stdout + assert "service_user=wooo" in result.stdout + assert "service_user_readable=1" in result.stdout + assert "service_user_executable=1" in result.stdout assert "secret-token-like-content" not in result.stdout assert "CD_LANE_SERVICE_GUARDRAILS" in result.stdout assert "registration_condition=1" in result.stdout @@ -256,12 +260,46 @@ def test_110_controlled_cd_lane_requires_registration_condition(tmp_path: Path) assert result.returncode == 1 assert "BLOCKER controlled_cd_lane_service_registration_condition_missing" in result.stdout + assert "SERVICE_GUARDRAILS_READY=0" in result.stdout assert "AWOOOI_110_CONTROLLED_CD_LANE_READY=0" in result.stdout +def test_110_controlled_cd_lane_prioritizes_staging_before_registration( + tmp_path: Path, +) -> None: + result = _run_verifier( + tmp_path, + registration_present=False, + include_registration_condition=False, + ) + + assert result.returncode == 1 + assert "BLOCKER controlled_cd_lane_registration_missing" in result.stdout + assert "BLOCKER controlled_cd_lane_service_registration_condition_missing" in result.stdout + assert "SERVICE_GUARDRAILS_READY=0" in result.stdout + assert ( + "safe_next_step=restore_awoooi_cd_lane_drain_staging_artifacts_then_rerun_this_verifier" + in result.stdout + ) + + def test_110_controlled_cd_lane_blocks_active_legacy_runner(tmp_path: Path) -> None: result = _run_verifier(tmp_path, legacy_active=True) assert result.returncode == 1 assert "BLOCKER legacy_runner_unit_not_failclosed:gitea-act-runner-host.service" in result.stdout assert "AWOOOI_110_CONTROLLED_CD_LANE_READY=0" in result.stdout + + +def test_110_controlled_cd_lane_verifier_checks_service_user_access() -> None: + text = VERIFIER.read_text(encoding="utf-8") + + assert "CD_LANE_DRAIN_USER" in text + assert "service_user_test -r \"$CD_LANE_DRAIN_CONFIG\"" in text + assert "service_user_test -x \"$CD_LANE_DRAIN_BINARY\"" in text + assert "service_user_test -r \"$registration\"" in text + assert "SERVICE_GUARDRAILS_READY" in text + assert "restore_awoooi_cd_lane_drain_staging_artifacts_then_rerun_this_verifier" in text + assert "controlled_cd_lane_config_not_readable_by_service_user" in text + assert "controlled_cd_lane_binary_not_executable_by_service_user" in text + assert "controlled_cd_lane_registration_not_readable_by_service_user" in text diff --git a/ops/runner/test_register_awoooi_110_controlled_cd_lane_drain.py b/ops/runner/test_register_awoooi_110_controlled_cd_lane_drain.py new file mode 100644 index 00000000..29836453 --- /dev/null +++ b/ops/runner/test_register_awoooi_110_controlled_cd_lane_drain.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import os +import subprocess +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[2] +REGISTER = ROOT / "ops/runner/register-awoooi-110-controlled-cd-lane-drain.sh" + + +def _make_lane(tmp_path: Path) -> dict[str, str]: + lane_dir = tmp_path / "awoooi-cd-lane-drain" + workdir = lane_dir / "data" + workdir.mkdir(parents=True) + binary = lane_dir / "awoooi_cd_lane_controlled" + binary.write_text("#!/usr/bin/env bash\nexit 99\n", encoding="utf-8") + binary.chmod(0o755) + config = lane_dir / "config.yaml" + config.write_text( + "\n".join( + [ + "runner:", + " capacity: 1", + " labels:", + ' - "awoooi-host:host"', + ' - "awoooi-ubuntu:docker://192.168.0.110:5000/awoooi/ci-runner:act-22.04"', + "", + ] + ), + encoding="utf-8", + ) + return { + "RUNNER_DIR": str(lane_dir), + "RUNNER_WORKDIR": str(workdir), + "RUNNER_BINARY": str(binary), + "RUNNER_CONFIG": str(config), + "RUNNER_REGISTRATION": str(workdir / ".runner"), + "READINESS_VERIFIER": str(tmp_path / "missing-verifier"), + } + + +def test_110_register_helper_has_no_token_argv_path() -> None: + text = REGISTER.read_text(encoding="utf-8") + assert "--token" not in text + assert "runner_token_env_not_allowed" in text + assert "runner_token_in_argv=false" in text + assert "runner_token_echoed=false" in text + assert "raw_runner_registration_read=false" in text + assert "Do not pipe the script over stdin" in text + assert "REQUIRE_ACTIVE_SERVICE=0" in text + + +def test_110_register_helper_rejects_runner_token_env(tmp_path: Path) -> None: + env = {**os.environ, **_make_lane(tmp_path), "RUNNER_TOKEN": "fake-token-must-not-leak"} + + result = subprocess.run( + ["bash", str(REGISTER), "--check"], + check=False, + env=env, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + assert result.returncode != 0 + assert "runner_token_env_not_allowed" in result.stderr + assert "fake-token-must-not-leak" not in result.stdout + assert "fake-token-must-not-leak" not in result.stderr + + +def test_110_register_helper_dry_run_requires_no_token(tmp_path: Path) -> None: + result = subprocess.run( + ["bash", str(REGISTER), "--check"], + check=False, + env={**os.environ, **_make_lane(tmp_path)}, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + assert result.returncode == 0 + assert "dry_run=true" in result.stdout + assert "safe_next_step=run_this_script_from_interactive_tty_without_capturing_token" in result.stdout + assert "runner_token_in_argv=false" in result.stdout + assert "--token" not in result.stdout + + +def test_110_register_helper_existing_registration_does_not_read_content(tmp_path: Path) -> None: + env = {**os.environ, **_make_lane(tmp_path)} + Path(env["RUNNER_REGISTRATION"]).write_text("secret-like-registration-content\n", encoding="utf-8") + + result = subprocess.run( + ["bash", str(REGISTER), "--check"], + check=False, + env=env, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + assert result.returncode == 0 + assert "runner_registration_present=1" in result.stdout + assert "runner_registration_content_read=false" in result.stdout + assert "secret-like-registration-content" not in result.stdout + assert "secret-like-registration-content" not in result.stderr + assert "safe_next_step=enable_awoooi_cd_lane_drain_service_and_rerun_110_readiness_verifier" in result.stdout + + +def test_110_register_helper_enable_service_dry_run_does_not_call_systemctl(tmp_path: Path) -> None: + fake_bin = tmp_path / "bin" + fake_bin.mkdir() + systemctl = fake_bin / "systemctl" + systemctl.write_text( + "#!/usr/bin/env bash\nprintf 'systemctl_must_not_run\\n' >&2\nexit 44\n", + encoding="utf-8", + ) + systemctl.chmod(0o755) + env = { + **os.environ, + **_make_lane(tmp_path), + "PATH": f"{fake_bin}:{os.environ['PATH']}", + } + Path(env["RUNNER_REGISTRATION"]).write_text("secret-like-registration-content\n", encoding="utf-8") + + result = subprocess.run( + ["bash", str(REGISTER), "--check", "--enable-service"], + check=False, + env=env, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + assert result.returncode == 0 + assert "dry_run=true" in result.stdout + assert "safe_next_step=enable_service_after_registration_with_verifier" in result.stdout + assert "systemctl_must_not_run" not in result.stderr