From f96389499511e1625f840f95dae910e626d797f8 Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 1 Jul 2026 12:42:34 +0800 Subject: [PATCH] fix(recovery): surface 110 ssh account metadata receipt --- ...or_registry_controlled_recovery_receipt.py | 29 ++++++++++++++- ...or_registry_controlled_recovery_receipt.py | 12 ++++++ docs/LOGBOOK.md | 14 +++++++ .../repair-110-ssh-publickey-auth-local.sh | 37 ++++++++++++++++++- .../test_cold_start_monitor_bounded_probes.py | 9 +++++ 5 files changed, 99 insertions(+), 2 deletions(-) diff --git a/apps/api/src/services/harbor_registry_controlled_recovery_receipt.py b/apps/api/src/services/harbor_registry_controlled_recovery_receipt.py index 1eaa3882..d56d596b 100644 --- a/apps/api/src/services/harbor_registry_controlled_recovery_receipt.py +++ b/apps/api/src/services/harbor_registry_controlled_recovery_receipt.py @@ -940,6 +940,21 @@ def _parse_ssh_local_repair_output(output: str) -> dict[str, Any]: authorized_keys_exists = ( "AUTHORIZED_KEYS_STATUS" in output and "exists=1" in output ) + account_locked = _bool_from_field(fields.get("account_locked")) + shell_executable = _bool_from_field(fields.get("shell_executable")) + sshd_effective_config_available = _bool_from_field(fields.get("available")) + pubkey_authentication = str(fields.get("pubkeyauthentication") or "") + usepam = str(fields.get("usepam") or "") + authorized_keys_file_default = _bool_from_field( + fields.get("authorized_keys_file_default") + ) + account_metadata_ready = bool( + user_exists + and not account_locked + and shell_executable + and pubkey_authentication == "yes" + and authorized_keys_file_default + ) permissions_applied = "APPLIED permissions" in output reload_done = "SSH_RELOAD=done" in output reload_skipped = "SSH_RELOAD=skipped" in output @@ -949,12 +964,24 @@ def _parse_ssh_local_repair_output(output: str) -> dict[str, Any]: "sshd_config_syntax_ok": sshd_ok, "sshd_config_syntax_after_apply_ok": sshd_after_ok, "target_user_exists": user_exists, + "target_user_account_locked": account_locked, + "target_user_shell_executable": shell_executable, + "sshd_effective_config_available": sshd_effective_config_available, + "sshd_pubkeyauthentication": pubkey_authentication, + "sshd_usepam": usepam, + "sshd_authorized_keys_file_default": authorized_keys_file_default, + "account_metadata_ready": account_metadata_ready, "authorized_keys_metadata_present": authorized_keys_exists, "permissions_applied": permissions_applied, "ssh_reload_done": reload_done, "ssh_reload_skipped": reload_skipped, "control_channel_metadata_ready": bool( - marker_seen and sshd_ok and sshd_after_ok and user_exists and authorized_keys_exists + marker_seen + and sshd_ok + and sshd_after_ok + and user_exists + and authorized_keys_exists + and account_metadata_ready ), } diff --git a/apps/api/tests/test_harbor_registry_controlled_recovery_receipt.py b/apps/api/tests/test_harbor_registry_controlled_recovery_receipt.py index a87b588e..865db211 100644 --- a/apps/api/tests/test_harbor_registry_controlled_recovery_receipt.py +++ b/apps/api/tests/test_harbor_registry_controlled_recovery_receipt.py @@ -30,6 +30,16 @@ def test_harbor_recovery_receipt_accepts_verified_repair() -> None: assert payload["readback"]["ssh_local_repair"][ "control_channel_metadata_ready" ] is True + assert payload["readback"]["ssh_local_repair"]["account_metadata_ready"] is True + assert payload["readback"]["ssh_local_repair"]["target_user_account_locked"] is False + assert payload["readback"]["ssh_local_repair"]["target_user_shell_executable"] is True + assert payload["readback"]["ssh_local_repair"]["sshd_pubkeyauthentication"] == "yes" + assert ( + payload["readback"]["ssh_local_repair"][ + "sshd_authorized_keys_file_default" + ] + is True + ) assert payload["readback"]["watchdog_repair"]["harbor_ready"] is True assert payload["readback"]["post_apply_verifier"]["registry_v2_ready"] is True assert payload["readback"]["deploy_marker"]["deploy_marker_verified"] is True @@ -877,9 +887,11 @@ AWOOOI_110_SSH_PUBLICKEY_AUTH_LOCAL_REPAIR mode=apply target_user=wooo SSH_SERVICE_ACTIVE=active SSHD_CONFIG_SYNTAX=ok USER_STATUS user=wooo exists=1 home=/home/wooo +ACCOUNT_METADATA user=wooo passwd_status=P account_locked=false shell=/bin/bash shell_exists=true shell_executable=true PATH_STATUS path=/home/wooo mode=755 owner=wooo group=wooo type=directory PATH_STATUS path=/home/wooo/.ssh mode=700 owner=wooo group=wooo type=directory AUTHORIZED_KEYS_STATUS path=/home/wooo/.ssh/authorized_keys exists=1 bytes=380 lines=1 +SSHD_EFFECTIVE_CONFIG available=true pubkeyauthentication=yes passwordauthentication=no kbdinteractiveauthentication=no usepam=yes maxstartups=10:30:100 authorized_keys_file_default=true APPLIED permissions target_user=wooo home=/home/wooo SSHD_CONFIG_SYNTAX_AFTER_APPLY=ok SSH_RELOAD=skipped diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 48d7d5ea..dcf12dfa 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -1,3 +1,17 @@ +## 2026-07-01 — 12:38 110 SSH local metadata receipt 擴充 + +**照主線修正的問題**: +- 現行 P0 blocker 已收斂為 `harbor_110_remote_ssh_publickey_auth_stalled`:110 TCP / SSH banner 可達,但 `wooo` publickey userauth 等不到 server reply;Harbor `/v2/` 仍為 `502`。 +- `scripts/reboot-recovery/repair-110-ssh-publickey-auth-local.sh --check` 原本只回 user/home/.ssh/authorized_keys metadata;本次補 no-secret account / shell / sshd effective config metadata:`ACCOUNT_METADATA`、`account_locked`、`shell_executable`、`SSHD_EFFECTIVE_CONFIG`、`pubkeyauthentication`、`usepam`、`maxstartups`、`authorized_keys_file_default`。 +- `apps/api/src/services/harbor_registry_controlled_recovery_receipt.py` 同步解析上述欄位,`control_channel_metadata_ready` 現在要求 target user 未鎖、shell 可執行、sshd pubkey auth 開啟、AuthorizedKeysFile 仍指向 `.ssh/authorized_keys`,避免只看到 authorized_keys 存在就誤判 control path ready。 + +**驗證**: +- `DATABASE_URL=... PYTHONPATH=apps/api python3.11 -m pytest apps/api/tests/test_harbor_registry_controlled_recovery_receipt.py scripts/reboot-recovery/tests/test_cold_start_monitor_bounded_probes.py -q`:`27 passed`。 +- `DATABASE_URL=... PYTHONPATH=apps/api python3.11 -m pytest ops/runner/test_read_public_gitea_actions_queue.py ops/runner/test_cd_controlled_runtime_profile.py ops/runner/test_verify_awoooi_non110_cd_closure.py apps/api/tests/test_harbor_registry_controlled_recovery_receipt.py apps/api/tests/test_ai_agent_log_controlled_writeback_executor_readback_api.py apps/api/tests/test_ai_agent_log_controlled_writeback_plan_readback_api.py apps/api/tests/test_ai_agent_autonomous_runtime_control.py apps/api/tests/test_ai_agent_log_intelligence_integration_readback_api.py apps/api/tests/test_awoooi_priority_work_order_readback_api.py -q`:`122 passed`。 +- `py_compile`、`bash -n`、`ops/runner/guard-gitea-runner-pressure.py --root .`、`node scripts/ci/check-gitea-step-env-secrets.js .gitea/workflows/harbor-110-local-repair.yaml .gitea/workflows/cd.yaml`、`git diff --check`:通過。 + +**邊界**:只改 110 local no-secret metadata checker、receipt parser、tests 與 LOGBOOK;未讀 authorized_keys 內容、secret / token / `.env` / raw sessions / SQLite / auth;未讀 `.runner` 內容;未使用 GitHub / `gh` / GitHub API;未 workflow_dispatch;未重啟主機、未 restart Docker / Nginx / K3s / DB / firewall。 + ## 2026-07-01 — 12:35 110 CPU / control-plane live readback **照主線釐清的問題**: diff --git a/scripts/reboot-recovery/repair-110-ssh-publickey-auth-local.sh b/scripts/reboot-recovery/repair-110-ssh-publickey-auth-local.sh index 9de1d823..16c60ebb 100755 --- a/scripts/reboot-recovery/repair-110-ssh-publickey-auth-local.sh +++ b/scripts/reboot-recovery/repair-110-ssh-publickey-auth-local.sh @@ -70,14 +70,26 @@ stat_path() { check_user() { local user="$1" - local home_dir + local home_dir shell passwd_status account_locked shell_exists shell_executable home_dir="$(getent passwd "$user" | awk -F: '{print $6}')" if [ -z "$home_dir" ]; then echo "USER_STATUS user=$user exists=0" return 1 fi + shell="$(getent passwd "$user" | awk -F: '{print $7}')" + passwd_status="$(passwd -S "$user" 2>/dev/null | awk '{print $2}' || true)" + account_locked=false + case "$passwd_status" in + L|LK) account_locked=true ;; + esac + shell_exists=false + shell_executable=false + [ -n "$shell" ] && [ -e "$shell" ] && shell_exists=true + [ -n "$shell" ] && [ -x "$shell" ] && shell_executable=true + echo "USER_STATUS user=$user exists=1 home=$home_dir" + echo "ACCOUNT_METADATA user=$user passwd_status=${passwd_status:-unknown} account_locked=${account_locked} shell=${shell:-unknown} shell_exists=${shell_exists} shell_executable=${shell_executable}" stat_path "$home_dir" stat_path "$home_dir/.ssh" stat_path "$home_dir/.ssh/authorized_keys" @@ -88,6 +100,28 @@ check_user() { fi } +check_sshd_effective_config() { + local user="$1" + local effective pubkey password kbdinteractive usepam maxstartups authorized_keys_file_default + effective="$(sshd -T -C "user=$user,host=localhost,addr=127.0.0.1" 2>/dev/null || true)" + if [ -z "$effective" ]; then + echo "SSHD_EFFECTIVE_CONFIG available=false" + return 0 + fi + + pubkey="$(printf '%s\n' "$effective" | awk '$1 == "pubkeyauthentication" {print $2; exit}')" + password="$(printf '%s\n' "$effective" | awk '$1 == "passwordauthentication" {print $2; exit}')" + kbdinteractive="$(printf '%s\n' "$effective" | awk '$1 == "kbdinteractiveauthentication" {print $2; exit}')" + usepam="$(printf '%s\n' "$effective" | awk '$1 == "usepam" {print $2; exit}')" + maxstartups="$(printf '%s\n' "$effective" | awk '$1 == "maxstartups" {print $2; exit}')" + if printf '%s\n' "$effective" | awk '$1 == "authorizedkeysfile" {$1=""; print}' | grep -q '\.ssh/authorized_keys'; then + authorized_keys_file_default=true + else + authorized_keys_file_default=false + fi + echo "SSHD_EFFECTIVE_CONFIG available=true pubkeyauthentication=${pubkey:-unknown} passwordauthentication=${password:-unknown} kbdinteractiveauthentication=${kbdinteractive:-unknown} usepam=${usepam:-unknown} maxstartups=${maxstartups:-unknown} authorized_keys_file_default=${authorized_keys_file_default}" +} + apply_user_permissions() { local user="$1" local home_dir @@ -118,6 +152,7 @@ systemctl is-active ssh 2>/dev/null | sed 's/^/SSH_SERVICE_ACTIVE=/' || true sshd -t echo "SSHD_CONFIG_SYNTAX=ok" check_user "$TARGET_USER" +check_sshd_effective_config "$TARGET_USER" if [ "$APPLY" -eq 1 ]; then apply_user_permissions "$TARGET_USER" diff --git a/scripts/reboot-recovery/tests/test_cold_start_monitor_bounded_probes.py b/scripts/reboot-recovery/tests/test_cold_start_monitor_bounded_probes.py index d98ad3aa..b9888fa5 100644 --- a/scripts/reboot-recovery/tests/test_cold_start_monitor_bounded_probes.py +++ b/scripts/reboot-recovery/tests/test_cold_start_monitor_bounded_probes.py @@ -138,6 +138,15 @@ def test_110_ssh_publickey_auth_repair_is_local_and_does_not_print_keys() -> Non assert "chmod 600" in text assert "chown \"$user:$user\"" in text assert "sshd -t" in text + assert "ACCOUNT_METADATA user=" in text + assert "passwd -S" in text + assert "account_locked=" in text + assert "shell_executable=" in text + assert "SSHD_EFFECTIVE_CONFIG available=true" in text + assert "sshd -T -C" in text + assert "pubkeyauthentication=" in text + assert "authorized_keys_file_default=" in text assert 'RELOAD_SSH="${RELOAD_SSH:-0}"' in text assert "cat \"$home_dir/.ssh/authorized_keys\"" not in text + assert "getent shadow" not in text assert "echo \"$(cat" not in text