#!/usr/bin/env python3 """ IwoooS Docker / systemd / host service post-incident readback 只讀計畫產生器。 本工具讀取 host service change evidence acceptance snapshot,建立事故後回讀 計畫:誰動了 Docker / systemd / compose / repair-bot、何時動、改前改後狀態、 哪些 public / admin route、AI provider、monitoring 與產品受影響、如何恢復、 如何防再發。它不 SSH、不讀 live host、不執行 docker / systemctl、不呼叫 repair-bot、不跑 Ansible、不做 route smoke、不保存 raw log / raw config, 也不把「服務變綠」誤判成 runtime authorization。 """ from __future__ import annotations import argparse import json import subprocess import sys from datetime import datetime, timedelta, timezone from pathlib import Path from typing import Any TAIPEI = timezone(timedelta(hours=8)) READBACK_FIELDS = [ "readback_candidate_id", "source_change_evidence_candidate_id", "surface_id", "label", "expected_host_scope", "config_kind", "service_scope", "control_tier", "write_capable_surface", "requires_live_evidence", "change_or_incident_ref", "actor_attribution_ref", "boot_time_ref", "restart_or_recovery_window_ref", "before_service_state_ref", "after_service_state_ref", "docker_daemon_state_ref", "compose_stack_state_ref", "systemd_unit_state_ref", "failed_unit_review_ref", "port_binding_state_ref", "dependency_impact_ref", "public_route_recovery_ref", "admin_route_recovery_ref", "agent_provider_health_ref", "monitoring_alert_ref", "operator_notification_ref", "cross_project_sync_ref", "restoration_evidence_ref", "postcheck_readback_ref", "recurrence_guard_ref", "maintenance_window", "rollback_owner", "reviewer_outcome", "followup_owner", "not_approval", ] REQUIRED_READBACK_FIELDS = [ "change_or_incident_ref", "actor_attribution_ref", "boot_time_ref", "restart_or_recovery_window_ref", "before_service_state_ref", "after_service_state_ref", "docker_daemon_state_ref", "compose_stack_state_ref", "systemd_unit_state_ref", "failed_unit_review_ref", "port_binding_state_ref", "dependency_impact_ref", "public_route_recovery_ref", "admin_route_recovery_ref", "agent_provider_health_ref", "monitoring_alert_ref", "operator_notification_ref", "cross_project_sync_ref", "restoration_evidence_ref", "postcheck_readback_ref", "recurrence_guard_ref", "maintenance_window", "rollback_owner", "followup_owner", "redacted_evidence_refs", "no_secret_value_attestation", "no_raw_log_or_config_attestation", "no_false_green_attestation", ] REVIEWER_CHECKS = [ {"check_id": "source_change_evidence_current", "instruction": "來源 host service change evidence snapshot 必須是目前版本。"}, {"check_id": "incident_ref_present", "instruction": "必須有可追溯 change / incident ref。"}, {"check_id": "actor_not_anonymous", "instruction": "必須標示 actor role / team,不接受匿名 restart、kill、start、compose 或 daemon 操作。"}, {"check_id": "boot_or_recovery_window_present", "instruction": "boot time、restart window 或 recovery window 必須有脫敏 ref。"}, {"check_id": "before_after_service_state_present", "instruction": "必須有 before / after service state ref,不能只寫服務已恢復。"}, {"check_id": "docker_daemon_state_present", "instruction": "Docker daemon active、starting、failed、socket、contention 或 API reachable 狀態必須有摘要 ref。"}, {"check_id": "compose_stack_state_present", "instruction": "Compose stack / container state 只能收脫敏狀態摘要 ref,不保存 raw docker ps dump。"}, {"check_id": "systemd_unit_state_present", "instruction": "systemd failed unit、restart policy 或 degraded state 必須有摘要 ref。"}, {"check_id": "failed_unit_review_present", "instruction": "必須說明 failed unit 是否與事故、restart 或服務恢復相關。"}, {"check_id": "port_binding_state_present", "instruction": "必須確認 host port、container port、proxy、gateway 與 firewall 狀態是否一致。"}, {"check_id": "dependency_impact_present", "instruction": "必須列出上游、下游、DB、queue、registry、AI provider、public route 與 monitoring 影響。"}, {"check_id": "public_route_recovery_present", "instruction": "public route 受影響時需有恢復 ref;無影響也需明確不適用。"}, {"check_id": "admin_route_recovery_present", "instruction": "admin / internal operator route 受影響時需有恢復 ref;無影響也需明確不適用。"}, {"check_id": "agent_provider_health_present", "instruction": "Ollama、AI provider、agent route 或 webhook 受影響時需有健康 readback ref。"}, {"check_id": "monitoring_alert_ref_present", "instruction": "需列 monitoring / alert / dashboard / incident ref,不能只靠人工觀察。"}, {"check_id": "operator_notification_present", "instruction": "需提供已通知受影響產品、owner 或 Session 的脫敏 ref。"}, {"check_id": "cross_project_sync_present", "instruction": "若影響 AwoooP、IwoooS、agent-bounty、StockPlatform、公開網站或監控,需有跨專案同步 ref。"}, {"check_id": "restoration_evidence_present", "instruction": "已恢復事故需提供恢復時間與恢復證據;未恢復需提供 still-degraded ref。"}, {"check_id": "postcheck_independent", "instruction": "post-check 必須獨立於原操作人與 UI 卡片。"}, {"check_id": "recurrence_guard_present", "instruction": "必須提出防再發 guard、change freeze、owner review 或自動化阻擋。"}, {"check_id": "runner_repair_bot_contention_present", "instruction": "必須確認 runner、repair-bot、backup job、iptables / xtables 或 compose action 是否競爭。"}, {"check_id": "maintenance_window_present", "instruction": "後續任何 restart / repair / compose / systemd 操作都需維護窗口。"}, {"check_id": "rollback_owner_present", "instruction": "rollback owner 與 rollback plan 必須同時存在。"}, {"check_id": "no_false_green_route_or_container", "instruction": "不得只用 route 200、container up、Docker API 回應、dashboard up 或 service healthy 當成事故已驗收。"}, {"check_id": "raw_log_config_absent", "instruction": "不得保存 raw docker logs、raw journal、raw compose、raw systemd unit、env dump 或未脫敏 host config。"}, {"check_id": "secret_or_key_value_absent", "instruction": "不得包含 secret、SSH key、token、cookie、private key、hash 或 partial secret。"}, {"check_id": "counts_transition_safe", "instruction": "只有 reviewer record 能更新 accepted count,且不得同時開 runtime gate。"}, {"check_id": "runtime_stays_zero", "instruction": "readback plan 不得觸發任何 SSH、Docker、systemctl、repair-bot、Ansible、route smoke 或 production write。"}, ] OUTCOME_LANES = [ {"lane_id": "waiting_post_incident_readback", "meaning": "尚未收到主機服務事故回讀包;所有 accepted / runtime count 維持 0。"}, {"lane_id": "request_actor_supplement", "meaning": "缺 actor / owner / decision 時要求補件。"}, {"lane_id": "request_before_after_supplement", "meaning": "缺 before / after、boot time、restart window 或 restoration evidence 時要求補件。"}, {"lane_id": "request_service_state_supplement", "meaning": "缺 Docker daemon、compose、systemd、failed unit、port binding 或 dependency 狀態時要求補件。"}, {"lane_id": "request_impact_supplement", "meaning": "缺 public/admin route、AI provider、monitoring、operator notification 或 cross-project sync 時要求補件。"}, {"lane_id": "quarantine_raw_payload", "meaning": "收到 secret、env dump、raw log、raw journal、raw compose 或未脫敏 host config 時只能隔離。"}, {"lane_id": "reject_unattributed_restart", "meaning": "無 actor、無 affected scope、無 rollback 或無 notification 的 restart / kill / compose action 不得驗收。"}, {"lane_id": "ready_for_host_service_post_incident_review", "meaning": "metadata 合格後,只能進 reviewer review。"}, {"lane_id": "recurrence_guard_backfill_required", "meaning": "需補防再發 guard、owner review、change freeze 或 automation block。"}, {"lane_id": "waiting_runtime_gate", "meaning": "即使 readback accepted,runtime gate 仍需獨立人工批准。"}, ] BLOCKED_ACTIONS = [ "ssh_read", "ssh_write", "live_host_read", "docker_ps_live_read", "docker_restart", "docker_kill", "docker_start", "docker_compose_up", "docker_compose_down", "docker_compose_pull", "systemctl_restart", "systemctl_reload", "systemctl_kill", "systemctl_start", "repair_bot_execute", "ansible_apply", "sudo_action", "host_file_write", "firewall_change", "port_change", "route_smoke", "public_gateway_reload", "nginx_reload", "active_scan", "secret_value_collection", "raw_live_config_storage", "raw_docker_log_storage", "raw_journal_storage", "raw_env_dump_storage", "accept_restart_without_actor", "accept_recovery_without_before_after", "accept_service_healthy_as_config_accepted", "accept_route_200_as_all_green", "accept_container_up_as_all_green", "skip_dependency_map_review", "skip_port_binding_review", "hide_daemon_runner_contention", "mark_readback_accepted_without_reviewer_record", "open_runtime_gate", "add_action_button", "production_write", ] def git_short_sha(root: Path) -> str: try: result = subprocess.run( ["git", "rev-parse", "--short", "HEAD"], cwd=root, check=True, capture_output=True, text=True, ) return result.stdout.strip() except Exception: return "unknown" def load_json(path: Path) -> dict[str, Any]: return json.loads(path.read_text(encoding="utf-8")) def build_candidate(source: dict[str, Any]) -> dict[str, Any]: surface_id = source["surface_id"] return { "readback_candidate_id": f"host_service_post_incident_readback:{surface_id}", "status": "waiting_post_incident_readback", "source_change_evidence_candidate_id": source["change_evidence_candidate_id"], "surface_id": surface_id, "label": source["label"], "expected_host_scope": source["expected_host_scope"], "config_kind": source["config_kind"], "service_scope": source["service_scope"], "control_tier": source["control_tier"], "write_capable_surface": source["write_capable_surface"], "requires_live_evidence": source["requires_live_evidence"], "change_or_incident_ref": None, "actor_attribution_ref": None, "boot_time_ref": None, "restart_or_recovery_window_ref": None, "before_service_state_ref": None, "after_service_state_ref": None, "docker_daemon_state_ref": None, "compose_stack_state_ref": None, "systemd_unit_state_ref": None, "failed_unit_review_ref": None, "port_binding_state_ref": None, "dependency_impact_ref": None, "public_route_recovery_ref": None, "admin_route_recovery_ref": None, "agent_provider_health_ref": None, "monitoring_alert_ref": None, "operator_notification_ref": None, "cross_project_sync_ref": None, "restoration_evidence_ref": None, "postcheck_readback_ref": None, "recurrence_guard_ref": None, "maintenance_window": "pending_post_incident_readback", "rollback_owner": "pending_post_incident_readback", "reviewer_outcome": "waiting_post_incident_readback", "followup_owner": "pending_post_incident_readback", "readback_fields": READBACK_FIELDS, "required_readback_fields": REQUIRED_READBACK_FIELDS, "reviewer_checks": [item["check_id"] for item in REVIEWER_CHECKS], "outcome_lanes": [item["lane_id"] for item in OUTCOME_LANES], "blocked_actions": BLOCKED_ACTIONS, "not_approval": True, "post_incident_readback_received": False, "post_incident_readback_accepted": False, "actor_attribution_accepted": False, "before_after_state_accepted": False, "docker_daemon_state_accepted": False, "compose_stack_state_accepted": False, "systemd_unit_state_accepted": False, "failed_unit_review_accepted": False, "port_binding_state_accepted": False, "dependency_impact_accepted": False, "public_route_recovery_accepted": False, "admin_route_recovery_accepted": False, "agent_provider_health_accepted": False, "monitoring_alert_accepted": False, "operator_notification_accepted": False, "cross_project_sync_accepted": False, "restoration_evidence_accepted": False, "postcheck_readback_accepted": False, "recurrence_guard_accepted": False, "maintenance_window_accepted": False, "rollback_owner_accepted": False, "no_false_green_accepted": False, "ssh_read_authorized": False, "ssh_write_authorized": False, "live_host_read_authorized": False, "docker_action_authorized": False, "systemctl_action_authorized": False, "repair_bot_execution_authorized": False, "ansible_apply_authorized": False, "route_smoke_authorized": False, "secret_value_collection_allowed": False, "active_scan_authorized": False, "runtime_gate": False, "action_buttons_allowed": False, "production_write_authorized": False, } def build_report(root: Path, source_report: dict[str, Any], generated_at: str | None) -> dict[str, Any]: report_time = generated_at or datetime.now(TAIPEI).isoformat(timespec="seconds") source_candidates = source_report.get("change_evidence_candidates", []) readback_candidates = [build_candidate(item) for item in source_candidates] write_capable = [item for item in readback_candidates if item["write_capable_surface"]] live_required = [item for item in readback_candidates if item["requires_live_evidence"]] return { "schema_version": "host_service_post_incident_readback_plan_v1", "generated_at": report_time, "git_commit": git_short_sha(root), "status": "post_incident_readback_plan_ready_no_runtime_action", "source_schema_version": source_report.get("schema_version"), "source_status": source_report.get("status"), "source_paths": [ "docs/security/HOST-SERVICE-CHANGE-EVIDENCE-ACCEPTANCE.md", "docs/security/host-service-change-evidence-acceptance.snapshot.json", "docs/security/HOST-SERVICE-OWNER-RESPONSE-ACCEPTANCE.md", "docs/security/host-service-owner-response-acceptance.snapshot.json", ], "summary": { "readback_candidate_count": len(readback_candidates), "write_capable_readback_candidate_count": len(write_capable), "live_evidence_required_readback_candidate_count": len(live_required), "recovery_health_impact_review_required_candidate_count": len(readback_candidates), "cross_project_sync_required_candidate_count": len(readback_candidates), "no_false_green_required_candidate_count": len(readback_candidates), "readback_field_count": len(READBACK_FIELDS), "required_readback_field_count": len(REQUIRED_READBACK_FIELDS), "reviewer_check_count": len(REVIEWER_CHECKS), "outcome_lane_count": len(OUTCOME_LANES), "blocked_action_count": len(BLOCKED_ACTIONS), "post_incident_readback_received_count": 0, "post_incident_readback_accepted_count": 0, "actor_attribution_accepted_count": 0, "before_after_state_accepted_count": 0, "docker_daemon_state_accepted_count": 0, "compose_stack_state_accepted_count": 0, "systemd_unit_state_accepted_count": 0, "failed_unit_review_accepted_count": 0, "port_binding_state_accepted_count": 0, "dependency_impact_accepted_count": 0, "public_route_recovery_accepted_count": 0, "admin_route_recovery_accepted_count": 0, "agent_provider_health_accepted_count": 0, "monitoring_alert_accepted_count": 0, "operator_notification_accepted_count": 0, "cross_project_sync_accepted_count": 0, "restoration_evidence_accepted_count": 0, "postcheck_readback_accepted_count": 0, "recurrence_guard_accepted_count": 0, "no_false_green_accepted_count": 0, "runtime_gate_count": 0, "action_button_count": 0, "coverage_percent_after_readback_plan": 64, }, "required_readback_fields": REQUIRED_READBACK_FIELDS, "reviewer_checks": REVIEWER_CHECKS, "outcome_lanes": OUTCOME_LANES, "blocked_actions": BLOCKED_ACTIONS, "readback_candidates": readback_candidates, "boundaries": { "not_authorization": True, "ssh_read_authorized": False, "ssh_write_authorized": False, "live_host_read_authorized": False, "docker_action_authorized": False, "docker_restart_authorized": False, "docker_kill_authorized": False, "docker_start_authorized": False, "docker_compose_action_authorized": False, "systemctl_action_authorized": False, "systemctl_restart_authorized": False, "repair_bot_execution_authorized": False, "ansible_apply_authorized": False, "route_smoke_authorized": False, "public_gateway_reload_authorized": False, "nginx_reload_authorized": False, "active_scan_authorized": False, "secret_value_collection_allowed": False, "raw_log_or_config_storage_allowed": False, "runtime_execution_authorized": False, "production_write_authorized": False, "action_buttons_allowed": False, }, } def main() -> int: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--root", default=".") parser.add_argument( "--source-change-evidence-report", default="docs/security/host-service-change-evidence-acceptance.snapshot.json", ) parser.add_argument( "--output", default="docs/security/host-service-post-incident-readback-plan.snapshot.json", ) parser.add_argument("--generated-at") args = parser.parse_args() root = Path(args.root).resolve() source_report = load_json(root / args.source_change_evidence_report) report = build_report(root, source_report, args.generated_at) payload = json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True) output_path = root / args.output output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(payload + "\n", encoding="utf-8") summary = report["summary"] print( "HOST_SERVICE_POST_INCIDENT_READBACK_PLAN_OK " f"candidates={summary['readback_candidate_count']} " f"checks={summary['reviewer_check_count']} " f"lanes={summary['outcome_lane_count']} " f"accepted={summary['post_incident_readback_accepted_count']} " f"runtime_gate={summary['runtime_gate_count']}" ) return 0 if __name__ == "__main__": sys.exit(main())