diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 60827ab3..48d7d5ea 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -13,6 +13,21 @@ **邊界**:本輪只讀 node exporter / cAdvisor / Prometheus / public routes / bounded SSH;未讀 secret / token / `.env` / raw sessions / SQLite / auth;未讀 `.runner` 內容;未使用 GitHub / `gh` / GitHub API;未 workflow_dispatch;未重啟主機、未 restart Docker / Nginx / K3s / DB / firewall。 +## 2026-07-01 — 12:38 110 local recovery package AI Loop alias 實作 + +**照主線修正的問題**: +- Post-push CD `#4225` 已觸發 `a550adb55`,但仍因 Harbor public `/v2/` = `502` 失敗;Harbor repair lane 的 live blocker 仍是 `harbor_110_remote_ssh_publickey_auth_stalled`。 +- AI Loop sample / priority work order 指向 `recover-110-control-path-and-harbor-local.sh --apply-ssh-control-path`,但 recovery orchestrator 原本只支援 `--apply-ssh-metadata`;這會讓 AI Agent next action 指到不存在的 mode。 +- `scripts/reboot-recovery/recover-110-control-path-and-harbor-local.sh` 新增 `--apply-ssh-control-path` 受控 alias,canonical mode 仍是 `--apply-ssh-metadata`,只執行 SSH metadata helper,不執行 Harbor repair;輸出 `ssh_publickey_auth_stall_recovery_supported=true`、`ssh_control_path_apply_alias_supported=true`、`operation_boundary_authorized_keys_content_printed=false`、`operation_boundary_ssh_key_material_created=false` 等 receipt 欄位。 +- `scripts/reboot-recovery/tests/test_recover_110_control_path_and_harbor_local.py` 新增 alias 測試,固定 AI Loop 呼叫此 mode 時只跑 `--apply` SSH helper,不跑 Harbor / lane helper。 + +**驗證**: +- `python3.11 -m pytest scripts/reboot-recovery/tests/test_recover_110_control_path_and_harbor_local.py -q`:`3 passed`。 +- `python3.11 -m pytest ops/runner/test_cd_controlled_runtime_profile.py -q`:`35 passed`。 +- `git diff --check`:通過。 + +**邊界**:只改 110 local recovery orchestrator、測試與 LOGBOOK;未讀 secret / token / `.env` / raw sessions / SQLite / auth;未讀 authorized_keys 內容或 `.runner` 內容;未使用 GitHub / `gh` / GitHub API;未 workflow_dispatch;未重啟主機、未 restart Docker / Nginx / K3s / DB / firewall;未執行 110 runtime apply。 + ## 2026-07-01 — 12:33 AI Loop LOG writeback SSH auth-stall blocker 細分 **照主線修正的問題**: diff --git a/scripts/reboot-recovery/recover-110-control-path-and-harbor-local.sh b/scripts/reboot-recovery/recover-110-control-path-and-harbor-local.sh index 4e1293e8..1c0d7c6f 100644 --- a/scripts/reboot-recovery/recover-110-control-path-and-harbor-local.sh +++ b/scripts/reboot-recovery/recover-110-control-path-and-harbor-local.sh @@ -8,6 +8,7 @@ set -euo pipefail MODE="check" +REQUESTED_MODE="--check" TARGET_USER="${TARGET_USER:-wooo}" EXPECTED_HOST_IP="${AWOOOI_110_EXPECTED_HOST_IP:-192.168.0.110}" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" @@ -18,11 +19,13 @@ RELOAD_SSH="${RELOAD_SSH:-0}" usage() { cat <<'USAGE' -Usage: recover-110-control-path-and-harbor-local.sh [--check|--apply-ssh-metadata|--repair-harbor-once|--apply-all] +Usage: recover-110-control-path-and-harbor-local.sh [--check|--apply-ssh-metadata|--apply-ssh-control-path|--repair-harbor-once|--apply-all] Modes: --check Read-only checks for SSH metadata, Harbor readiness, and controlled CD lane readiness. --apply-ssh-metadata Fix TARGET_USER home/.ssh/authorized_keys metadata only. + --apply-ssh-control-path + AI Loop alias for --apply-ssh-metadata. --repair-harbor-once Run one bounded Harbor watchdog repair cycle only. --apply-all Apply SSH metadata repair, then one Harbor repair cycle. @@ -38,18 +41,22 @@ USAGE while [ "$#" -gt 0 ]; do case "$1" in - --check) - MODE="check" - ;; - --apply-ssh-metadata) - MODE="apply_ssh_metadata" - ;; - --repair-harbor-once) - MODE="repair_harbor_once" - ;; - --apply-all) - MODE="apply_all" - ;; + --check) + MODE="check" + REQUESTED_MODE="$1" + ;; + --apply-ssh-metadata|--apply-ssh-control-path) + MODE="apply_ssh_metadata" + REQUESTED_MODE="$1" + ;; + --repair-harbor-once) + MODE="repair_harbor_once" + REQUESTED_MODE="$1" + ;; + --apply-all) + MODE="apply_all" + REQUESTED_MODE="$1" + ;; -h|--help) usage exit 0 @@ -184,11 +191,18 @@ run_controlled_lane_check() { TARGET_HOST_IP="$EXPECTED_HOST_IP" "$script" } -echo "AWOOOI_110_CONTROL_PATH_AND_HARBOR_LOCAL_RECOVERY mode=${MODE} target_user=${TARGET_USER}" +echo "AWOOOI_110_CONTROL_PATH_AND_HARBOR_LOCAL_RECOVERY mode=${MODE} requested_mode=${REQUESTED_MODE} target_user=${TARGET_USER}" echo "expected_host_ip=${EXPECTED_HOST_IP}" +echo "ssh_publickey_auth_stall_recovery_supported=true" +echo "ssh_control_path_apply_alias_supported=true" +echo "ssh_control_path_apply_alias=--apply-ssh-control-path" +echo "ssh_control_path_apply_canonical_mode=--apply-ssh-metadata" +echo "local_console_required_for_apply=true" echo "operation_boundary_secret_value_read=false" echo "operation_boundary_runner_token_read=false" echo "operation_boundary_raw_runner_registration_read=false" +echo "operation_boundary_authorized_keys_content_printed=false" +echo "operation_boundary_ssh_key_material_created=false" echo "operation_boundary_host_reboot_performed=false" echo "operation_boundary_docker_daemon_restart_performed=false" echo "operation_boundary_node_drain_performed=false" diff --git a/scripts/reboot-recovery/tests/test_recover_110_control_path_and_harbor_local.py b/scripts/reboot-recovery/tests/test_recover_110_control_path_and_harbor_local.py index 23e56183..f3e99334 100644 --- a/scripts/reboot-recovery/tests/test_recover_110_control_path_and_harbor_local.py +++ b/scripts/reboot-recovery/tests/test_recover_110_control_path_and_harbor_local.py @@ -15,11 +15,16 @@ def test_recover_110_orchestrator_contracts() -> None: assert "--check" in text assert "--apply-ssh-metadata" in text + assert "--apply-ssh-control-path" in text assert "--repair-harbor-once" in text assert "--apply-all" in text assert "operation_boundary_secret_value_read=false" in text assert "operation_boundary_runner_token_read=false" in text assert "operation_boundary_raw_runner_registration_read=false" in text + assert "operation_boundary_authorized_keys_content_printed=false" in text + assert "operation_boundary_ssh_key_material_created=false" in text + assert "ssh_publickey_auth_stall_recovery_supported=true" in text + assert "ssh_control_path_apply_alias_supported=true" in text assert "operation_boundary_host_reboot_performed=false" in text assert "operation_boundary_docker_daemon_restart_performed=false" in text assert "repair-110-ssh-publickey-auth-local.sh" in text @@ -85,6 +90,7 @@ def test_recover_110_check_uses_fake_helpers_without_writes(tmp_path: Path) -> N assert result.returncode == 0, result.stdout + result.stderr assert "AWOOOI_110_CONTROL_PATH_AND_HARBOR_LOCAL_RECOVERY mode=check" in result.stdout + assert "requested_mode=--check" in result.stdout assert "SSH_HELPER_MODE=--check" in result.stdout assert "HARBOR_HELPER_MODE=--check" in result.stdout assert "LANE_VERIFIER_MODE=check" in result.stdout @@ -93,3 +99,62 @@ def test_recover_110_check_uses_fake_helpers_without_writes(tmp_path: Path) -> N assert "LANE_VERIFIER_REGISTRATION_CONTENT_READ=false" in result.stdout assert "operation_boundary_secret_value_read=false" in result.stdout assert "operation_boundary_raw_runner_registration_read=false" in result.stdout + + +def test_recover_110_accepts_ai_loop_apply_ssh_control_path_alias( + tmp_path: Path, +) -> None: + ssh_helper = tmp_path / "ssh-helper.sh" + harbor_helper = tmp_path / "harbor-helper.sh" + lane_helper = tmp_path / "lane-helper.sh" + ssh_helper.write_text( + "#!/usr/bin/env bash\n" + "echo SSH_HELPER_MODE=$1\n" + "echo SSH_METADATA_WRITE=$([ \"$1\" = \"--apply\" ] && echo true || echo false)\n", + encoding="utf-8", + ) + harbor_helper.write_text( + "#!/usr/bin/env bash\n" + "echo HARBOR_HELPER_SHOULD_NOT_RUN=$1\n" + "exit 70\n", + encoding="utf-8", + ) + lane_helper.write_text( + "#!/usr/bin/env bash\n" + "echo LANE_HELPER_SHOULD_NOT_RUN=$1\n" + "exit 71\n", + encoding="utf-8", + ) + for helper in (ssh_helper, harbor_helper, lane_helper): + helper.chmod(helper.stat().st_mode | stat.S_IXUSR) + + env = { + **os.environ, + "ALLOW_NON_110": "1", + "AWOOOI_110_SSH_REPAIR_SCRIPT": str(ssh_helper), + "AWOOOI_HARBOR_WATCHDOG_SCRIPT": str(harbor_helper), + "AWOOOI_110_CONTROLLED_LANE_VERIFIER_SCRIPT": str(lane_helper), + } + result = subprocess.run( + ["bash", str(RECOVERY), "--apply-ssh-control-path"], + check=False, + env=env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + assert result.returncode == 0, result.stdout + result.stderr + assert ( + "AWOOOI_110_CONTROL_PATH_AND_HARBOR_LOCAL_RECOVERY mode=apply_ssh_metadata" + in result.stdout + ) + assert "requested_mode=--apply-ssh-control-path" in result.stdout + assert "ssh_control_path_apply_alias_supported=true" in result.stdout + assert "ssh_control_path_apply_canonical_mode=--apply-ssh-metadata" in result.stdout + assert "SSH_HELPER_MODE=--apply" in result.stdout + assert "SSH_METADATA_WRITE=true" in result.stdout + assert "HARBOR_HELPER_SHOULD_NOT_RUN" not in result.stdout + assert "LANE_HELPER_SHOULD_NOT_RUN" not in result.stdout + assert "operation_boundary_authorized_keys_content_printed=false" in result.stdout + assert "operation_boundary_ssh_key_material_created=false" in result.stdout