fix(recovery): include 110 controlled lane verifier
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / build-and-deploy (push) Has been cancelled
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / tests (push) Has been cancelled
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / build-and-deploy (push) Has been cancelled
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / tests (push) Has been cancelled
This commit is contained in:
@@ -50672,6 +50672,7 @@ production browser smoke:
|
||||
- verifier 僅讀 metadata,不讀 `.runner` 內容、不印 runner token;檢查 110 host selector、controlled drain lane `capacity=1`、`awoooi-host:host` / `awoooi-ubuntu` labels、ELF binary、registration metadata 存在、systemd CPU / memory / tasks / `NoNewPrivileges` guardrails、legacy runner fail-closed、root restore-source left `0`、active action container / heavy process / load 壓力。
|
||||
- `awoooi-cd-lane-drain.service` 與 `awoooi-startup-110.sh` 產生的 controlled drain unit 新增 `ConditionPathExists=/home/wooo/awoooi-cd-lane-drain/data/.runner`,避免 service active 但未註冊時假裝可承接 `awoooi-host` queue。
|
||||
- `ops/runner/verify-awoooi-non110-cd-closure.py` 的 Harbor 110 no-matching next action 改為先在 110 跑 `check-awoooi-110-controlled-cd-lane-readiness.sh`,通過後再恢復 `awoooi-host` control path 並重讀 queue/closure。
|
||||
- `recover-110-control-path-and-harbor-local.sh` 的 `--check` 串入 controlled lane verifier;`deploy-to-110.sh` 同步安裝 `/usr/local/bin/check-awoooi-110-controlled-cd-lane-readiness.sh`,讓 110 local console / root shell recovery 包同時涵蓋 SSH metadata、Harbor watchdog 與 `awoooi-host` lane readiness。
|
||||
|
||||
**本地驗證結果**:
|
||||
- `pytest ops/runner/test_check_awoooi_110_controlled_cd_lane_readiness.py ops/runner/test_verify_awoooi_non110_cd_closure.py ops/runner/test_cd_controlled_runtime_profile.py ops/runner/test_guard_gitea_runner_pressure.py scripts/reboot-recovery/tests/test_cold_start_monitor_bounded_probes.py -q`:`56 passed`。
|
||||
|
||||
@@ -171,8 +171,10 @@ def test_deploy_to_110_syncs_local_control_path_recovery_helpers() -> None:
|
||||
|
||||
assert "repair-110-ssh-publickey-auth-local.sh" in text
|
||||
assert "recover-110-control-path-and-harbor-local.sh" in text
|
||||
assert "check-awoooi-110-controlled-cd-lane-readiness.sh" in text
|
||||
assert "/usr/local/bin/repair-110-ssh-publickey-auth-local.sh" in text
|
||||
assert "/usr/local/bin/recover-110-control-path-and-harbor-local.sh" in text
|
||||
assert "/usr/local/bin/check-awoooi-110-controlled-cd-lane-readiness.sh" in text
|
||||
|
||||
|
||||
def test_onboarding_warning_step_template_stays_on_controlled_runtime_profile() -> None:
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
set -euo pipefail
|
||||
HOST="wooo@192.168.0.110"
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
ROOT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
echo "=== 部署 awoooi-startup-110 + harbor-watchdog 到 192.168.0.110 ==="
|
||||
|
||||
@@ -18,6 +19,7 @@ scp "$SCRIPT_DIR/awoooi-startup-110.sh" "$HOST:/tmp/awoooi-startup-110.sh"
|
||||
scp "$SCRIPT_DIR/awoooi-startup-110.service" "$HOST:/tmp/awoooi-startup-110.service"
|
||||
scp "$SCRIPT_DIR/repair-110-ssh-publickey-auth-local.sh" "$HOST:/tmp/repair-110-ssh-publickey-auth-local.sh"
|
||||
scp "$SCRIPT_DIR/recover-110-control-path-and-harbor-local.sh" "$HOST:/tmp/recover-110-control-path-and-harbor-local.sh"
|
||||
scp "$ROOT_DIR/ops/runner/check-awoooi-110-controlled-cd-lane-readiness.sh" "$HOST:/tmp/check-awoooi-110-controlled-cd-lane-readiness.sh"
|
||||
|
||||
# 2. 上傳 watchdog
|
||||
echo "[2/5] 上傳 harbor-watchdog..."
|
||||
@@ -32,6 +34,8 @@ ssh "$HOST" "sudo cp /tmp/awoooi-startup-110.sh /usr/local/bin/awoooi-startup-11
|
||||
sudo chmod +x /usr/local/bin/repair-110-ssh-publickey-auth-local.sh && \
|
||||
sudo cp /tmp/recover-110-control-path-and-harbor-local.sh /usr/local/bin/recover-110-control-path-and-harbor-local.sh && \
|
||||
sudo chmod +x /usr/local/bin/recover-110-control-path-and-harbor-local.sh && \
|
||||
sudo cp /tmp/check-awoooi-110-controlled-cd-lane-readiness.sh /usr/local/bin/check-awoooi-110-controlled-cd-lane-readiness.sh && \
|
||||
sudo chmod +x /usr/local/bin/check-awoooi-110-controlled-cd-lane-readiness.sh && \
|
||||
sudo cp /tmp/awoooi-startup-110.service /etc/systemd/system/awoooi-startup-110.service && \
|
||||
sudo systemctl daemon-reload && \
|
||||
sudo systemctl enable awoooi-startup-110.service && \
|
||||
|
||||
@@ -13,6 +13,7 @@ EXPECTED_HOST_IP="${AWOOOI_110_EXPECTED_HOST_IP:-192.168.0.110}"
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
SSH_REPAIR_SCRIPT="${AWOOOI_110_SSH_REPAIR_SCRIPT:-}"
|
||||
HARBOR_WATCHDOG_SCRIPT="${AWOOOI_HARBOR_WATCHDOG_SCRIPT:-}"
|
||||
CONTROLLED_LANE_VERIFIER_SCRIPT="${AWOOOI_110_CONTROLLED_LANE_VERIFIER_SCRIPT:-}"
|
||||
RELOAD_SSH="${RELOAD_SSH:-0}"
|
||||
|
||||
usage() {
|
||||
@@ -20,7 +21,7 @@ usage() {
|
||||
Usage: recover-110-control-path-and-harbor-local.sh [--check|--apply-ssh-metadata|--repair-harbor-once|--apply-all]
|
||||
|
||||
Modes:
|
||||
--check Read-only checks for SSH metadata and Harbor readiness.
|
||||
--check Read-only checks for SSH metadata, Harbor readiness, and controlled CD lane readiness.
|
||||
--apply-ssh-metadata Fix TARGET_USER home/.ssh/authorized_keys metadata only.
|
||||
--repair-harbor-once Run one bounded Harbor watchdog repair cycle only.
|
||||
--apply-all Apply SSH metadata repair, then one Harbor repair cycle.
|
||||
@@ -120,6 +121,22 @@ resolve_harbor_watchdog_script() {
|
||||
return 1
|
||||
}
|
||||
|
||||
resolve_controlled_lane_verifier_script() {
|
||||
if [ -n "$CONTROLLED_LANE_VERIFIER_SCRIPT" ] && [ -x "$CONTROLLED_LANE_VERIFIER_SCRIPT" ]; then
|
||||
printf '%s\n' "$CONTROLLED_LANE_VERIFIER_SCRIPT"
|
||||
return 0
|
||||
fi
|
||||
if [ -x "/usr/local/bin/check-awoooi-110-controlled-cd-lane-readiness.sh" ]; then
|
||||
printf '%s\n' "/usr/local/bin/check-awoooi-110-controlled-cd-lane-readiness.sh"
|
||||
return 0
|
||||
fi
|
||||
if [ -x "$SCRIPT_DIR/../../ops/runner/check-awoooi-110-controlled-cd-lane-readiness.sh" ]; then
|
||||
printf '%s\n' "$SCRIPT_DIR/../../ops/runner/check-awoooi-110-controlled-cd-lane-readiness.sh"
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
run_ssh_check() {
|
||||
local script
|
||||
if ! script="$(resolve_ssh_repair_script)"; then
|
||||
@@ -158,9 +175,20 @@ run_harbor_repair_once() {
|
||||
"$script" --repair-once
|
||||
}
|
||||
|
||||
run_controlled_lane_check() {
|
||||
local script
|
||||
if ! script="$(resolve_controlled_lane_verifier_script)"; then
|
||||
echo "CONTROLLED_LANE_VERIFIER_SCRIPT_STATUS=missing"
|
||||
return 1
|
||||
fi
|
||||
TARGET_HOST_IP="$EXPECTED_HOST_IP" "$script"
|
||||
}
|
||||
|
||||
echo "AWOOOI_110_CONTROL_PATH_AND_HARBOR_LOCAL_RECOVERY mode=${MODE} target_user=${TARGET_USER}"
|
||||
echo "expected_host_ip=${EXPECTED_HOST_IP}"
|
||||
echo "operation_boundary_secret_value_read=false"
|
||||
echo "operation_boundary_runner_token_read=false"
|
||||
echo "operation_boundary_raw_runner_registration_read=false"
|
||||
echo "operation_boundary_host_reboot_performed=false"
|
||||
echo "operation_boundary_docker_daemon_restart_performed=false"
|
||||
echo "operation_boundary_node_drain_performed=false"
|
||||
@@ -169,6 +197,7 @@ case "$MODE" in
|
||||
check)
|
||||
run_ssh_check || true
|
||||
run_harbor_check || true
|
||||
run_controlled_lane_check || true
|
||||
;;
|
||||
apply_ssh_metadata)
|
||||
run_ssh_apply
|
||||
@@ -179,6 +208,7 @@ case "$MODE" in
|
||||
apply_all)
|
||||
run_ssh_apply
|
||||
run_harbor_repair_once
|
||||
run_controlled_lane_check || true
|
||||
;;
|
||||
*)
|
||||
echo "Unknown internal mode: $MODE" >&2
|
||||
|
||||
@@ -18,11 +18,15 @@ def test_recover_110_orchestrator_contracts() -> None:
|
||||
assert "--repair-harbor-once" in text
|
||||
assert "--apply-all" in text
|
||||
assert "operation_boundary_secret_value_read=false" in text
|
||||
assert "operation_boundary_runner_token_read=false" in text
|
||||
assert "operation_boundary_raw_runner_registration_read=false" in text
|
||||
assert "operation_boundary_host_reboot_performed=false" in text
|
||||
assert "operation_boundary_docker_daemon_restart_performed=false" in text
|
||||
assert "repair-110-ssh-publickey-auth-local.sh" in text
|
||||
assert "harbor-watchdog.sh" in text
|
||||
assert "check-awoooi-110-controlled-cd-lane-readiness.sh" in text
|
||||
assert "cat \"$home_dir/.ssh/authorized_keys\"" not in text
|
||||
assert "cat \"$CD_LANE_DRAIN_DIR/data/.runner\"" not in text
|
||||
|
||||
forbidden = [
|
||||
"systemctl restart docker",
|
||||
@@ -41,6 +45,7 @@ def test_recover_110_orchestrator_contracts() -> None:
|
||||
def test_recover_110_check_uses_fake_helpers_without_writes(tmp_path: Path) -> None:
|
||||
ssh_helper = tmp_path / "ssh-helper.sh"
|
||||
harbor_helper = tmp_path / "harbor-helper.sh"
|
||||
lane_helper = tmp_path / "lane-helper.sh"
|
||||
ssh_helper.write_text(
|
||||
"#!/usr/bin/env bash\n"
|
||||
"echo SSH_HELPER_MODE=$1\n"
|
||||
@@ -53,7 +58,13 @@ def test_recover_110_check_uses_fake_helpers_without_writes(tmp_path: Path) -> N
|
||||
"echo HARBOR_RUNTIME_WRITE=false\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
for helper in (ssh_helper, harbor_helper):
|
||||
lane_helper.write_text(
|
||||
"#!/usr/bin/env bash\n"
|
||||
"echo LANE_VERIFIER_MODE=check\n"
|
||||
"echo LANE_VERIFIER_REGISTRATION_CONTENT_READ=false\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
for helper in (ssh_helper, harbor_helper, lane_helper):
|
||||
helper.chmod(helper.stat().st_mode | stat.S_IXUSR)
|
||||
|
||||
env = {
|
||||
@@ -61,6 +72,7 @@ def test_recover_110_check_uses_fake_helpers_without_writes(tmp_path: Path) -> N
|
||||
"ALLOW_NON_110": "1",
|
||||
"AWOOOI_110_SSH_REPAIR_SCRIPT": str(ssh_helper),
|
||||
"AWOOOI_HARBOR_WATCHDOG_SCRIPT": str(harbor_helper),
|
||||
"AWOOOI_110_CONTROLLED_LANE_VERIFIER_SCRIPT": str(lane_helper),
|
||||
}
|
||||
result = subprocess.run(
|
||||
["bash", str(RECOVERY), "--check"],
|
||||
@@ -75,6 +87,9 @@ def test_recover_110_check_uses_fake_helpers_without_writes(tmp_path: Path) -> N
|
||||
assert "AWOOOI_110_CONTROL_PATH_AND_HARBOR_LOCAL_RECOVERY mode=check" in result.stdout
|
||||
assert "SSH_HELPER_MODE=--check" in result.stdout
|
||||
assert "HARBOR_HELPER_MODE=--check" in result.stdout
|
||||
assert "LANE_VERIFIER_MODE=check" in result.stdout
|
||||
assert "SSH_METADATA_WRITE=false" in result.stdout
|
||||
assert "HARBOR_RUNTIME_WRITE=false" in result.stdout
|
||||
assert "LANE_VERIFIER_REGISTRATION_CONTENT_READ=false" in result.stdout
|
||||
assert "operation_boundary_secret_value_read=false" in result.stdout
|
||||
assert "operation_boundary_raw_runner_registration_read=false" in result.stdout
|
||||
|
||||
Reference in New Issue
Block a user