fix(reboot): add windows99 no-secret verify collector
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Failing after 2m3s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped

This commit is contained in:
Your Name
2026-07-02 15:37:24 +08:00
parent 39cdd6dc5d
commit 85007db90c
7 changed files with 539 additions and 0 deletions

View File

@@ -0,0 +1,273 @@
#!/usr/bin/env bash
set -u
MODE="check"
TARGET_HOST="${WINDOWS99_HOST:-192.168.0.99}"
CONNECT_TIMEOUT="${WINDOWS99_CONNECT_TIMEOUT:-3}"
SSH_TIMEOUT="${WINDOWS99_SSH_TIMEOUT:-3}"
SSH_PORT="${WINDOWS99_SSH_PORT:-22}"
MAX_AUTH_USERS="${WINDOWS99_MAX_AUTH_USERS:-2}"
KNOWN_HOSTS_FILE="${WINDOWS99_KNOWN_HOSTS_FILE:-/tmp/awoooi-windows99-known_hosts}"
REMOTE_VERIFY_COMMAND="${WINDOWS99_REMOTE_VERIFY_COMMAND:-powershell -NoProfile -ExecutionPolicy Bypass -File .\\windows99-vmware-autostart.ps1 -Mode Verify}"
SSH_USERS=(ogt wooo ooo administrator Administrator)
if [[ -n "${WINDOWS99_SSH_USERS:-}" ]]; then
# shellcheck disable=SC2206
SSH_USERS=(${WINDOWS99_SSH_USERS})
fi
is_positive_int() {
[[ "$1" =~ ^[1-9][0-9]*$ ]]
}
if ! is_positive_int "${CONNECT_TIMEOUT}"; then
CONNECT_TIMEOUT=3
fi
if ! is_positive_int "${SSH_TIMEOUT}"; then
SSH_TIMEOUT=3
fi
if ! is_positive_int "${MAX_AUTH_USERS}"; then
MAX_AUTH_USERS=2
fi
usage() {
printf '%s\n' "usage: $0 [--check|--collect] [--host HOST] [--users 'u1 u2'] [--timeout SECONDS]"
}
while [[ $# -gt 0 ]]; do
case "$1" in
--check)
MODE="check"
;;
--collect)
MODE="collect"
;;
--host)
shift
TARGET_HOST="${1:-}"
;;
--users)
shift
# shellcheck disable=SC2206
SSH_USERS=(${1:-})
;;
--timeout)
shift
CONNECT_TIMEOUT="${1:-5}"
SSH_TIMEOUT="${CONNECT_TIMEOUT}"
;;
--help|-h)
usage
exit 0
;;
*)
printf '%s\n' "error=unknown_argument:$1" >&2
usage >&2
exit 64
;;
esac
shift
done
if ! is_positive_int "${CONNECT_TIMEOUT}"; then
CONNECT_TIMEOUT=3
fi
if ! is_positive_int "${SSH_TIMEOUT}"; then
SSH_TIMEOUT=3
fi
if ! is_positive_int "${MAX_AUTH_USERS}"; then
MAX_AUTH_USERS=2
fi
if [[ "${MODE}" != "check" && "${MODE}" != "collect" ]]; then
printf '%s\n' "error=invalid_mode:${MODE}" >&2
exit 64
fi
PORT_TIMEOUT_WRAPPER="none"
if command -v timeout >/dev/null 2>&1; then
PORT_TIMEOUT_WRAPPER="timeout"
elif command -v gtimeout >/dev/null 2>&1; then
PORT_TIMEOUT_WRAPPER="gtimeout"
fi
port_open() {
local port="$1"
if ! command -v nc >/dev/null 2>&1; then
return 1
fi
if [[ "${PORT_TIMEOUT_WRAPPER}" == "timeout" ]]; then
timeout "$((CONNECT_TIMEOUT + 1))s" nc -z -w "${CONNECT_TIMEOUT}" "${TARGET_HOST}" "${port}" >/dev/null 2>&1
elif [[ "${PORT_TIMEOUT_WRAPPER}" == "gtimeout" ]]; then
gtimeout "$((CONNECT_TIMEOUT + 1))s" nc -z -w "${CONNECT_TIMEOUT}" "${TARGET_HOST}" "${port}" >/dev/null 2>&1
else
nc -z -w "${CONNECT_TIMEOUT}" "${TARGET_HOST}" "${port}" >/dev/null 2>&1
fi
}
bool_for_port() {
local port="$1"
if port_open "${port}"; then
printf '1'
else
printf '0'
fi
}
join_users() {
local joined=""
local user
for user in "${SSH_USERS[@]}"; do
if [[ -z "${joined}" ]]; then
joined="${user}"
else
joined="${joined},${user}"
fi
done
printf '%s' "${joined}"
}
PORT_22_OPEN="$(bool_for_port 22)"
PORT_3389_OPEN="$(bool_for_port 3389)"
PORT_5985_OPEN="$(bool_for_port 5985)"
PORT_5986_OPEN="$(bool_for_port 5986)"
PORT_9182_OPEN="$(bool_for_port 9182)"
SSH_BATCHMODE_AUTH_READY=0
SSH_AUTHENTICATED_USER=""
SSH_AUTH_PROBE_EXIT_STATUS="not_attempted"
SSH_AUTH_PROBE_STDOUT_PRESENT=0
SSH_AUTH_ATTEMPTED_USERS="$(join_users)"
SSH_AUTH_PROBED_USERS=0
SSH_TIMEOUT_WRAPPER="none"
if command -v timeout >/dev/null 2>&1; then
SSH_TIMEOUT_WRAPPER="timeout"
elif command -v gtimeout >/dev/null 2>&1; then
SSH_TIMEOUT_WRAPPER="gtimeout"
fi
SSH_OPTS=(
-o BatchMode=yes
-o PreferredAuthentications=publickey
-o PubkeyAuthentication=yes
-o PasswordAuthentication=no
-o KbdInteractiveAuthentication=no
-o NumberOfPasswordPrompts=0
-o ConnectTimeout="${SSH_TIMEOUT}"
-o ConnectionAttempts=1
-o GSSAPIAuthentication=no
-o LogLevel=ERROR
-o StrictHostKeyChecking=no
-o UserKnownHostsFile="${KNOWN_HOSTS_FILE}"
-p "${SSH_PORT}"
)
run_ssh() {
local user="$1"
shift
if [[ "${SSH_TIMEOUT_WRAPPER}" == "timeout" ]]; then
timeout "$((SSH_TIMEOUT + 1))s" ssh "${SSH_OPTS[@]}" "${user}@${TARGET_HOST}" "$@"
elif [[ "${SSH_TIMEOUT_WRAPPER}" == "gtimeout" ]]; then
gtimeout "$((SSH_TIMEOUT + 1))s" ssh "${SSH_OPTS[@]}" "${user}@${TARGET_HOST}" "$@"
else
ssh "${SSH_OPTS[@]}" "${user}@${TARGET_HOST}" "$@"
fi
}
if [[ "${PORT_22_OPEN}" == "1" ]]; then
for user in "${SSH_USERS[@]}"; do
if [[ "${SSH_AUTH_PROBED_USERS}" -ge "${MAX_AUTH_USERS}" ]]; then
break
fi
SSH_AUTH_PROBED_USERS=$((SSH_AUTH_PROBED_USERS + 1))
auth_output=""
if auth_output="$(run_ssh "${user}" "echo AWOOOI_WINDOWS99_SSH_READY" 2>&1)"; then
SSH_BATCHMODE_AUTH_READY=1
SSH_AUTHENTICATED_USER="${user}"
SSH_AUTH_PROBE_EXIT_STATUS=0
if [[ -n "${auth_output}" ]]; then
SSH_AUTH_PROBE_STDOUT_PRESENT=1
fi
break
else
SSH_AUTH_PROBE_EXIT_STATUS=$?
fi
done
fi
DRY_RUN="true"
REMOTE_VERIFY_ATTEMPTED=0
REMOTE_VERIFY_EXIT_STATUS="not_attempted"
VERIFY_COLLECTION_STATUS="blocked_ssh_publickey_auth_missing"
SAFE_NEXT_STEP="select_existing_authorized_public_key_user_or_set_WINDOWS99_SSH_USERS_then_rerun_collector_no_password"
REMOTE_VERIFY_OUTPUT=""
PROCESS_EXIT_STATUS=0
if [[ "${PORT_22_OPEN}" != "1" ]]; then
VERIFY_COLLECTION_STATUS="blocked_ssh_port_closed"
SAFE_NEXT_STEP="enable_existing_ssh_management_channel_publickey_only_then_rerun_collector_no_secret"
if [[ "${MODE}" == "collect" ]]; then
PROCESS_EXIT_STATUS=75
fi
elif [[ "${SSH_BATCHMODE_AUTH_READY}" != "1" ]]; then
VERIFY_COLLECTION_STATUS="blocked_ssh_publickey_auth_missing"
SAFE_NEXT_STEP="select_existing_authorized_public_key_user_or_set_WINDOWS99_SSH_USERS_then_rerun_collector_no_password"
if [[ "${MODE}" == "collect" ]]; then
PROCESS_EXIT_STATUS=75
fi
elif [[ "${MODE}" == "check" ]]; then
VERIFY_COLLECTION_STATUS="ready_ssh_batchmode_auth_probe_only"
SAFE_NEXT_STEP="rerun_collector_with_collect_then_commit_no_secret_verify_artifact_and_scorecard_rerun"
else
DRY_RUN="false"
REMOTE_VERIFY_ATTEMPTED=1
if REMOTE_VERIFY_OUTPUT="$(run_ssh "${SSH_AUTHENTICATED_USER}" "${REMOTE_VERIFY_COMMAND}" 2>&1)"; then
REMOTE_VERIFY_EXIT_STATUS=0
VERIFY_COLLECTION_STATUS="collected_windows99_vmware_verify_stdout"
SAFE_NEXT_STEP="commit_no_secret_verify_artifact_then_rerun_reboot_auto_recovery_slo_scorecard"
else
REMOTE_VERIFY_EXIT_STATUS=$?
VERIFY_COLLECTION_STATUS="blocked_remote_verify_command_failed"
SAFE_NEXT_STEP="inspect_no_secret_verify_stdout_then_fix_verify_script_or_path_and_rerun_collector"
PROCESS_EXIT_STATUS=75
fi
fi
printf '%s\n' "schema_version=windows99_vmware_verify_collector_v1"
printf '%s\n' "dry_run=${DRY_RUN}"
printf '%s\n' "target_host=${TARGET_HOST}"
printf '%s\n' "target_host_alias=99"
printf '%s\n' "connect_timeout_seconds=${CONNECT_TIMEOUT}"
printf '%s\n' "ssh_timeout_seconds=${SSH_TIMEOUT}"
printf '%s\n' "port_timeout_wrapper=${PORT_TIMEOUT_WRAPPER}"
printf '%s\n' "ssh_auth_probe_user_limit=${MAX_AUTH_USERS}"
printf '%s\n' "ssh_timeout_wrapper=${SSH_TIMEOUT_WRAPPER}"
printf '%s\n' "port_22_open=${PORT_22_OPEN}"
printf '%s\n' "port_3389_open=${PORT_3389_OPEN}"
printf '%s\n' "port_5985_open=${PORT_5985_OPEN}"
printf '%s\n' "port_5986_open=${PORT_5986_OPEN}"
printf '%s\n' "port_9182_open=${PORT_9182_OPEN}"
printf '%s\n' "ssh_candidate_users=${SSH_AUTH_ATTEMPTED_USERS}"
printf '%s\n' "ssh_auth_probed_users=${SSH_AUTH_PROBED_USERS}"
printf '%s\n' "ssh_batchmode_auth_ready=${SSH_BATCHMODE_AUTH_READY}"
printf '%s\n' "ssh_authenticated_user=${SSH_AUTHENTICATED_USER}"
printf '%s\n' "ssh_auth_probe_exit_status=${SSH_AUTH_PROBE_EXIT_STATUS}"
printf '%s\n' "ssh_auth_probe_stdout_present=${SSH_AUTH_PROBE_STDOUT_PRESENT}"
printf '%s\n' "remote_verify_attempted=${REMOTE_VERIFY_ATTEMPTED}"
printf '%s\n' "remote_verify_exit_status=${REMOTE_VERIFY_EXIT_STATUS}"
printf '%s\n' "verify_collection_status=${VERIFY_COLLECTION_STATUS}"
printf '%s\n' "safe_next_step=${SAFE_NEXT_STEP}"
printf '%s\n' "secret_value_read=false"
printf '%s\n' "password_prompt_allowed=false"
printf '%s\n' "remote_write_performed=false"
printf '%s\n' "host_reboot_performed=false"
printf '%s\n' "vm_power_change_performed=false"
printf '%s\n' "windows_update_policy_apply_performed=false"
if [[ "${REMOTE_VERIFY_ATTEMPTED}" == "1" ]]; then
printf '%s\n' "remote_verify_output_begin"
printf '%s\n' "${REMOTE_VERIFY_OUTPUT}"
printf '%s\n' "remote_verify_output_end"
fi
exit "${PROCESS_EXIT_STATUS}"

View File

@@ -842,6 +842,12 @@ def build_windows99_verify_collection_packet(
"powershell -ExecutionPolicy Bypass -File "
".\\windows99-vmware-autostart.ps1 -Mode Verify"
),
"no_secret_collector_check_command": (
"bash scripts/reboot-recovery/collect-windows99-vmware-verify.sh --check"
),
"no_secret_collector_collect_command": (
"bash scripts/reboot-recovery/collect-windows99-vmware-verify.sh --collect"
),
"post_verifier": (
"rerun_reboot_auto_recovery_slo_scorecard_with_"
"windows99_vmware_file_no_secret_no_reboot"
@@ -850,6 +856,7 @@ def build_windows99_verify_collection_packet(
"safe_collection_channels": [
"authorized_windows99_console_verify_stdout_only",
"existing_management_channel_verify_mode_only",
"no_secret_ssh_batchmode_verify_collector",
"committed_no_secret_artifact_file_then_scorecard_rerun",
],
"forbidden_actions": [

View File

@@ -368,6 +368,15 @@ def test_missing_windows99_vmware_readback_fails_closed(tmp_path: Path) -> None:
assert "-Mode Verify" in payload["windows99_verify_collection"][
"no_secret_verify_command"
]
assert payload["windows99_verify_collection"][
"no_secret_collector_check_command"
] == "bash scripts/reboot-recovery/collect-windows99-vmware-verify.sh --check"
assert payload["windows99_verify_collection"][
"no_secret_collector_collect_command"
] == "bash scripts/reboot-recovery/collect-windows99-vmware-verify.sh --collect"
assert "no_secret_ssh_batchmode_verify_collector" in payload[
"windows99_verify_collection"
]["safe_collection_channels"]
assert "windows_password_or_secret_collection" in payload[
"windows99_verify_collection"
]["forbidden_actions"]

View File

@@ -0,0 +1,217 @@
from __future__ import annotations
import os
import subprocess
import textwrap
from pathlib import Path
ROOT = Path(__file__).resolve().parents[3]
SCRIPT = ROOT / "scripts" / "reboot-recovery" / "collect-windows99-vmware-verify.sh"
def _write_executable(path: Path, text: str) -> None:
path.write_text(textwrap.dedent(text).lstrip())
path.chmod(0o755)
def _run_collector(fake_bin: Path, *args: str) -> subprocess.CompletedProcess[str]:
env = os.environ.copy()
env["PATH"] = f"{fake_bin}:{env['PATH']}"
env["WINDOWS99_KNOWN_HOSTS_FILE"] = str(fake_bin / "known_hosts")
return subprocess.run(
["bash", str(SCRIPT), *args],
cwd=ROOT,
env=env,
capture_output=True,
text=True,
check=False,
)
def _key_values(stdout: str) -> dict[str, str]:
values: dict[str, str] = {}
for line in stdout.splitlines():
if "=" not in line:
continue
key, value = line.split("=", 1)
values[key] = value
return values
def test_collector_contract_forbids_secret_and_runtime_actions() -> None:
text = SCRIPT.read_text()
assert "BatchMode=yes" in text
assert "PreferredAuthentications=publickey" in text
assert "PubkeyAuthentication=yes" in text
assert "PasswordAuthentication=no" in text
assert "KbdInteractiveAuthentication=no" in text
assert "NumberOfPasswordPrompts=0" in text
assert "ConnectionAttempts=1" in text
assert "GSSAPIAuthentication=no" in text
for forbidden in [
"sshpass",
"PasswordAuthentication=yes",
"KbdInteractiveAuthentication=yes",
"net use",
"shutdown",
"Restart-Computer",
"Start-VM",
"vmrun start",
"Set-ItemProperty",
"Register-ScheduledTask",
]:
assert forbidden not in text
def test_check_mode_reports_open_ports_and_missing_publickey_auth(tmp_path: Path) -> None:
fake_bin = tmp_path / "bin"
fake_bin.mkdir()
_write_executable(
fake_bin / "nc",
"""
#!/usr/bin/env bash
port="${!#}"
if [[ "$port" == "22" || "$port" == "3389" ]]; then
exit 0
fi
exit 1
""",
)
_write_executable(
fake_bin / "ssh",
"""
#!/usr/bin/env bash
printf '%s\n' 'Permission denied (publickey,password,keyboard-interactive).' >&2
exit 255
""",
)
result = _run_collector(fake_bin, "--check")
assert result.returncode == 0
values = _key_values(result.stdout)
assert values["schema_version"] == "windows99_vmware_verify_collector_v1"
assert values["dry_run"] == "true"
assert values["ssh_auth_probe_user_limit"] == "2"
assert values["port_22_open"] == "1"
assert values["port_3389_open"] == "1"
assert values["port_5985_open"] == "0"
assert values["port_5986_open"] == "0"
assert values["ssh_batchmode_auth_ready"] == "0"
assert values["ssh_auth_probed_users"] == "2"
assert values["remote_verify_attempted"] == "0"
assert values["verify_collection_status"] == "blocked_ssh_publickey_auth_missing"
assert values["secret_value_read"] == "false"
assert values["password_prompt_allowed"] == "false"
assert values["remote_write_performed"] == "false"
assert values["host_reboot_performed"] == "false"
assert values["vm_power_change_performed"] == "false"
assert values["windows_update_policy_apply_performed"] == "false"
def test_collect_mode_blocks_without_publickey_auth(tmp_path: Path) -> None:
fake_bin = tmp_path / "bin"
fake_bin.mkdir()
_write_executable(
fake_bin / "nc",
"""
#!/usr/bin/env bash
port="${!#}"
[[ "$port" == "22" ]]
""",
)
_write_executable(
fake_bin / "ssh",
"""
#!/usr/bin/env bash
exit 255
""",
)
result = _run_collector(fake_bin, "--collect")
assert result.returncode == 75
values = _key_values(result.stdout)
assert values["dry_run"] == "true"
assert values["ssh_batchmode_auth_ready"] == "0"
assert values["remote_verify_attempted"] == "0"
assert values["verify_collection_status"] == "blocked_ssh_publickey_auth_missing"
def test_check_mode_auth_ready_does_not_run_remote_verify(tmp_path: Path) -> None:
fake_bin = tmp_path / "bin"
fake_bin.mkdir()
_write_executable(
fake_bin / "nc",
"""
#!/usr/bin/env bash
exit 0
""",
)
_write_executable(
fake_bin / "ssh",
"""
#!/usr/bin/env bash
args="$*"
if [[ "$args" == *"powershell"* ]]; then
printf '%s\n' 'unexpected remote verify' >&2
exit 44
fi
printf '%s\n' 'AWOOOI_WINDOWS99_SSH_READY'
exit 0
""",
)
result = _run_collector(fake_bin, "--check")
assert result.returncode == 0
values = _key_values(result.stdout)
assert values["ssh_batchmode_auth_ready"] == "1"
assert values["remote_verify_attempted"] == "0"
assert values["verify_collection_status"] == "ready_ssh_batchmode_auth_probe_only"
assert "VMRUN_PRESENT=1" not in result.stdout
assert "remote_verify_output_begin" not in result.stdout
def test_collect_mode_runs_readonly_remote_verify_when_auth_ready(tmp_path: Path) -> None:
fake_bin = tmp_path / "bin"
fake_bin.mkdir()
_write_executable(
fake_bin / "nc",
"""
#!/usr/bin/env bash
exit 0
""",
)
_write_executable(
fake_bin / "ssh",
"""
#!/usr/bin/env bash
args="$*"
if [[ "$args" == *"powershell"* ]]; then
printf '%s\n' 'AWOOOI_WINDOWS99_VMWARE_AUTOSTART=1'
printf '%s\n' 'MODE=Verify'
printf '%s\n' 'VMRUN_PRESENT=1'
printf '%s\n' 'VMWARE_AUTOSTART_VERIFY_READY=1'
exit 0
fi
printf '%s\n' 'AWOOOI_WINDOWS99_SSH_READY'
exit 0
""",
)
result = _run_collector(fake_bin, "--collect")
assert result.returncode == 0
values = _key_values(result.stdout)
assert values["dry_run"] == "false"
assert values["ssh_batchmode_auth_ready"] == "1"
assert values["remote_verify_attempted"] == "1"
assert values["remote_verify_exit_status"] == "0"
assert values["verify_collection_status"] == "collected_windows99_vmware_verify_stdout"
assert "remote_verify_output_begin" in result.stdout
assert "VMRUN_PRESENT=1" in result.stdout
assert "VMWARE_AUTOSTART_VERIFY_READY=1" in result.stdout
assert "remote_verify_output_end" in result.stdout