fix(recovery): classify ssh key accept timeout
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 42s
CD Pipeline / build-and-deploy (push) Failing after 2m37s
CD Pipeline / post-deploy-checks (push) Has been skipped
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 42s
CD Pipeline / build-and-deploy (push) Failing after 2m37s
CD Pipeline / post-deploy-checks (push) Has been skipped
This commit is contained in:
@@ -145,6 +145,9 @@ def validate_harbor_registry_controlled_recovery_receipt(
|
||||
"ssh_publickey_offer_timeout_seen": ssh_diagnosis[
|
||||
"publickey_offer_timeout_seen"
|
||||
],
|
||||
"ssh_publickey_server_accepts_key_then_timeout_seen": (
|
||||
ssh_diagnosis["server_accepts_key_then_timeout_seen"]
|
||||
),
|
||||
"ssh_publickey_node_exporter_ok": ssh_diagnosis["node_exporter_ok"],
|
||||
"ssh_publickey_port_tcp_open": ssh_diagnosis["ssh_port_tcp_open"],
|
||||
"ssh_local_repair_receipt_seen": ssh_local["receipt_seen"],
|
||||
@@ -307,6 +310,8 @@ def _ssh_metadata_phase_status(
|
||||
return "blocked_ssh_metadata_repair_receipt_not_ready"
|
||||
if ssh_diagnosis["publickey_offer_timeout_seen"]:
|
||||
return "blocked_waiting_ssh_metadata_repair_receipt_after_publickey_timeout"
|
||||
if ssh_diagnosis["server_accepts_key_then_timeout_seen"]:
|
||||
return "blocked_waiting_110_local_session_path_diagnosis_after_key_accept_timeout"
|
||||
if watchdog_check["receipt_seen"]:
|
||||
return "skipped_not_required"
|
||||
return "blocked_waiting_ssh_metadata_or_harbor_preflight_receipt"
|
||||
@@ -342,6 +347,11 @@ def _parse_ssh_publickey_diagnosis_output(output: str) -> dict[str, Any]:
|
||||
and item["classification"] == "publickey_offer_timeout"
|
||||
for item in auth_attempts
|
||||
)
|
||||
server_accepts_key_then_timeout_seen = any(
|
||||
item["mode"] == "publickey"
|
||||
and item["classification"] == "server_accepts_key_then_timeout"
|
||||
for item in auth_attempts
|
||||
)
|
||||
preauth_timeout_count = sum(
|
||||
1 for item in auth_attempts if item["classification"] == "preauth_timeout"
|
||||
)
|
||||
@@ -362,6 +372,9 @@ def _parse_ssh_publickey_diagnosis_output(output: str) -> dict[str, Any]:
|
||||
"auth_classifications": auth_attempts,
|
||||
"wooo_publickey_classification": wooo_publickey,
|
||||
"publickey_offer_timeout_seen": publickey_offer_timeout_seen,
|
||||
"server_accepts_key_then_timeout_seen": (
|
||||
server_accepts_key_then_timeout_seen
|
||||
),
|
||||
"preauth_timeout_count": preauth_timeout_count,
|
||||
"permission_denied_count": permission_denied_count,
|
||||
"diagnosis_ready": bool(marker_seen and ssh_port_tcp_open and auth_attempts),
|
||||
@@ -774,6 +787,11 @@ def _active_blockers(
|
||||
and not ssh_local["control_channel_metadata_ready"]
|
||||
):
|
||||
blockers.append("ssh_publickey_offer_timeout_on_wooo")
|
||||
if (
|
||||
ssh_diagnosis["server_accepts_key_then_timeout_seen"]
|
||||
and not ssh_local["control_channel_metadata_ready"]
|
||||
):
|
||||
blockers.append("ssh_publickey_server_accepts_key_then_timeout_on_wooo")
|
||||
if ssh_local["receipt_seen"] and not ssh_local["control_channel_metadata_ready"]:
|
||||
blockers.append("ssh_local_repair_receipt_metadata_not_ready")
|
||||
if not watchdog_check["receipt_seen"]:
|
||||
|
||||
@@ -116,14 +116,18 @@ def test_harbor_recovery_receipt_accepts_ssh_publickey_diagnosis() -> None:
|
||||
assert payload["safe_next_step"] == (
|
||||
"run_110_local_ssh_metadata_check_then_harbor_watchdog_check_mode"
|
||||
)
|
||||
assert "ssh_publickey_offer_timeout_on_wooo" in payload["active_blockers"]
|
||||
assert "ssh_publickey_server_accepts_key_then_timeout_on_wooo" in payload[
|
||||
"active_blockers"
|
||||
]
|
||||
diagnosis = payload["readback"]["ssh_publickey_diagnosis"]
|
||||
assert diagnosis["diagnosis_ready"] is True
|
||||
assert diagnosis["node_exporter_ok"] is True
|
||||
assert diagnosis["ssh_port_tcp_open"] is True
|
||||
assert diagnosis["ssh_banner_seen"] is True
|
||||
assert diagnosis["wooo_publickey_classification"] == "publickey_offer_timeout"
|
||||
assert diagnosis["publickey_offer_timeout_seen"] is True
|
||||
assert diagnosis["wooo_publickey_classification"] == (
|
||||
"server_accepts_key_then_timeout"
|
||||
)
|
||||
assert diagnosis["server_accepts_key_then_timeout_seen"] is True
|
||||
assert diagnosis["raw_output_returned"] is False
|
||||
phases = {
|
||||
phase["phase_id"]: phase
|
||||
@@ -132,16 +136,41 @@ def test_harbor_recovery_receipt_accepts_ssh_publickey_diagnosis() -> None:
|
||||
assert phases["diagnose_ssh_publickey"]["status"] == "ready"
|
||||
assert phases["repair_ssh_metadata_if_check_confirms_metadata_drift"][
|
||||
"status"
|
||||
] == "blocked_waiting_ssh_metadata_repair_receipt_after_publickey_timeout"
|
||||
] == "blocked_waiting_110_local_session_path_diagnosis_after_key_accept_timeout"
|
||||
assert payload["rollups"]["ssh_publickey_diagnosis_receipt_seen"] is True
|
||||
assert payload["rollups"]["ssh_publickey_wooo_publickey_classification"] == (
|
||||
"publickey_offer_timeout"
|
||||
"server_accepts_key_then_timeout"
|
||||
)
|
||||
assert payload["input_redaction"]["ssh_publickey_diagnosis_output"][
|
||||
"line_count"
|
||||
] > 0
|
||||
|
||||
|
||||
def test_harbor_recovery_receipt_classifies_publickey_offer_timeout() -> None:
|
||||
diagnosis_output = _ssh_publickey_diagnosis_output().replace(
|
||||
"rc=124 classification=server_accepts_key_then_timeout",
|
||||
"rc=255 classification=publickey_offer_timeout",
|
||||
)
|
||||
|
||||
payload = validate_harbor_registry_controlled_recovery_receipt(
|
||||
{
|
||||
"ssh_publickey_diagnosis_output": diagnosis_output,
|
||||
}
|
||||
)
|
||||
|
||||
assert "ssh_publickey_offer_timeout_on_wooo" in payload["active_blockers"]
|
||||
diagnosis = payload["readback"]["ssh_publickey_diagnosis"]
|
||||
assert diagnosis["wooo_publickey_classification"] == "publickey_offer_timeout"
|
||||
assert diagnosis["publickey_offer_timeout_seen"] is True
|
||||
phases = {
|
||||
phase["phase_id"]: phase
|
||||
for phase in payload["local_console_phase_readback"]["phases"]
|
||||
}
|
||||
assert phases["repair_ssh_metadata_if_check_confirms_metadata_drift"][
|
||||
"status"
|
||||
] == "blocked_waiting_ssh_metadata_repair_receipt_after_publickey_timeout"
|
||||
|
||||
|
||||
def test_harbor_recovery_receipt_surfaces_gitea_queue_blockers() -> None:
|
||||
payload = validate_harbor_registry_controlled_recovery_receipt(
|
||||
{
|
||||
@@ -351,12 +380,12 @@ NODE_LOAD1_PER_CPU=0.93
|
||||
NODE_LOAD_CLASSIFIER=load_not_high
|
||||
SSH_PORT=tcp_open
|
||||
SSH_BANNER=SSH-2.0-OpenSSH_8.9p1
|
||||
SSH_AUTH user=wooo mode=publickey rc=255 classification=publickey_offer_timeout
|
||||
SSH_AUTH user=wooo mode=publickey rc=124 classification=server_accepts_key_then_timeout
|
||||
SSH_AUTH user=root mode=publickey rc=255 classification=permission_denied
|
||||
SSH_AUTH user=git mode=publickey rc=255 classification=preauth_timeout
|
||||
SSH_AUTH user=ollama mode=publickey rc=255 classification=preauth_timeout
|
||||
SSH_AUTH user=wooo mode=password_disabled rc=255 classification=permission_denied
|
||||
INTERPRETATION=publickey_offer_timeout_on_wooo_means_check_110_authorized_keys_permissions_pam_or_account_lookup_path
|
||||
INTERPRETATION=server_accepts_key_then_timeout_on_wooo_means_check_110_session_pam_account_or_shell_path
|
||||
"""
|
||||
|
||||
|
||||
|
||||
@@ -40,7 +40,9 @@ run_timeout() {
|
||||
|
||||
classify_log() {
|
||||
local path="$1"
|
||||
if grep -q 'Server accepts key' "$path"; then
|
||||
if grep -q 'Server accepts key' "$path" && grep -Eiq 'timed out|not responding|Timeout' "$path"; then
|
||||
echo "server_accepts_key_then_timeout"
|
||||
elif grep -q 'Server accepts key' "$path"; then
|
||||
echo "server_accepts_key"
|
||||
elif grep -q 'Offering public key' "$path" && grep -Eiq 'timed out|not responding|Timeout' "$path"; then
|
||||
echo "publickey_offer_timeout"
|
||||
@@ -134,4 +136,4 @@ for user in "${USERS[@]}"; do
|
||||
probe_user "$user" "password_disabled"
|
||||
done
|
||||
|
||||
echo "INTERPRETATION=publickey_offer_timeout_on_wooo_means_check_110_authorized_keys_permissions_pam_or_account_lookup_path"
|
||||
echo "INTERPRETATION=server_accepts_key_then_timeout_means_check_110_session_pam_account_or_shell_path;publickey_offer_timeout_means_check_110_authorized_keys_permissions_pam_or_account_lookup_path"
|
||||
|
||||
@@ -113,6 +113,7 @@ def test_110_ssh_publickey_auth_diagnosis_is_bounded_and_read_only() -> None:
|
||||
assert "PasswordAuthentication=no" in text
|
||||
assert "PubkeyAuthentication=no" in text
|
||||
assert "NumberOfPasswordPrompts=0" in text
|
||||
assert "server_accepts_key_then_timeout" in text
|
||||
assert "publickey_offer_timeout" in text
|
||||
assert "NODE_EXPORTER=ok" in text
|
||||
assert "NODE_LOAD1_PER_CPU" in text
|
||||
|
||||
Reference in New Issue
Block a user