From f1149e560ec5c26fc336a7a89b03dbf0879afedc Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 1 Jul 2026 09:54:36 +0800 Subject: [PATCH] fix(runner): preserve harbor ssh probe failure rc --- .gitea/workflows/harbor-110-local-repair.yaml | 3 ++- docs/LOGBOOK.md | 1 + ops/runner/test_cd_controlled_runtime_profile.py | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitea/workflows/harbor-110-local-repair.yaml b/.gitea/workflows/harbor-110-local-repair.yaml index 760e5d7f0..a03082ed9 100644 --- a/.gitea/workflows/harbor-110-local-repair.yaml +++ b/.gitea/workflows/harbor-110-local-repair.yaml @@ -76,8 +76,9 @@ jobs: if timeout 30 "${ssh_base[@]}" "$@"; then echo "harbor_110_remote_ssh_probe_attempt=${attempt} result=success" return 0 + else + rc=$? fi - rc=$? echo "harbor_110_remote_ssh_probe_attempt=${attempt} result=failure rc=${rc}" if [ "${attempt}" -lt "${SSH_PROBE_ATTEMPTS}" ]; then sleep "${SSH_PROBE_SLEEP_SECONDS}" diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 6bca74414..9246bd082 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -4,6 +4,7 @@ - 最新 live truth:CD `#4215` 仍因 Harbor public `/v2/` = `502` 失敗;Harbor repair `#4212` 的具體 blocker 是 `harbor_110_remote_control_channel_unavailable`。 - 188 non-110 runner lane 讀回 ready、host pressure 正常;但 188 → 110 bounded SSH probe 呈現間歇性,一次 `true` 可成功,下一次 `recover-110-control-path-and-harbor-local.sh --check` 又 timeout。 - `.gitea/workflows/harbor-110-local-repair.yaml` 對非寫入的 SSH probe / verifier 加 bounded retry:預設 `6` 次、每次仍受 `ConnectTimeout=8`、`ServerAlive*` 與外層 `timeout 30` 限制,並輸出 `harbor_110_remote_ssh_probe_attempt=...` receipt。`run_recovery --apply-all` 不自動 retry,避免半套用被重跑。 +- follow-up 修正:retry failure branch 必須在 `else` 內保存原始 `rc`,避免 shell `if` compound status 把連續 timeout 誤記為 `rc=0` / success。 **驗證**: - `python3.11 -m pytest ops/runner/test_cd_controlled_runtime_profile.py -q`:`35 passed`。 diff --git a/ops/runner/test_cd_controlled_runtime_profile.py b/ops/runner/test_cd_controlled_runtime_profile.py index 28b732abf..0fe0533e0 100644 --- a/ops/runner/test_cd_controlled_runtime_profile.py +++ b/ops/runner/test_cd_controlled_runtime_profile.py @@ -141,6 +141,7 @@ def test_harbor_110_local_repair_workflow_is_dispatch_only_and_bounded() -> None 'SSH_PROBE_SLEEP_SECONDS="${AWOOOI_110_SSH_PROBE_SLEEP_SECONDS:-10}"' in text ) + assert "else\n rc=$?" in text assert "harbor_110_remote_ssh_probe_attempt=" in text assert "operation_boundary_remote_ssh_bounded=true" in text assert "harbor_110_remote_control_channel_unavailable" in text