diff --git a/apps/api/src/services/ai_agent_log_controlled_writeback_executor_readback.py b/apps/api/src/services/ai_agent_log_controlled_writeback_executor_readback.py index 8a79f677..5c3a9ae3 100644 --- a/apps/api/src/services/ai_agent_log_controlled_writeback_executor_readback.py +++ b/apps/api/src/services/ai_agent_log_controlled_writeback_executor_readback.py @@ -368,6 +368,10 @@ def _current_blocker_queue_item(recovery: dict[str, Any]) -> dict[str, Any]: "node_load_high" if node_load_classifier == "high_load" else "" ) log_source_tags = _current_blocker_log_source_tags(recovery) + safe_next_action = _current_blocker_safe_next_action( + recovery, + control_path_pressure_blocker=control_path_pressure_blocker, + ) return { "queue_item_id": f"current-p0-blocker::{blocker_id}", "source_sample_id": source_sample_id, @@ -441,6 +445,15 @@ def _current_blocker_queue_item(recovery: dict[str, Any]) -> dict[str, Any]: recovery.get("forbidden_runtime_actions") ), "safe_next_step": str(recovery.get("safe_next_step") or ""), + "safe_next_action_id": safe_next_action["action_id"], + "safe_next_action_stage": safe_next_action["stage"], + "safe_next_action": safe_next_action["action"], + "safe_next_action_command": safe_next_action["command"], + "safe_next_action_post_verifier": safe_next_action["post_verifier"], + "safe_next_action_requires_local_console": safe_next_action[ + "requires_local_console" + ], + "safe_next_action_blocker_fields": safe_next_action["blocker_fields"], "runtime_write_gate": "controlled_after_110_local_console_preflight", "runtime_apply_required_on_110_local_console": bool( recovery.get("runtime_apply_required_on_110_local_console") is True @@ -456,6 +469,90 @@ def _current_blocker_queue_item(recovery: dict[str, Any]) -> dict[str, Any]: } +def _current_blocker_safe_next_action( + recovery: dict[str, Any], + *, + control_path_pressure_blocker: str, +) -> dict[str, Any]: + blocker_id = str(recovery.get("blocker_id") or "") + external_blocker = str(recovery.get("external_control_path_blocker") or "") + deploy_marker_required = recovery.get("deploy_marker_readback_required") is True + cd_failed_after_registry_ready = ( + recovery.get("cd_failed_after_registry_ready") is True + ) + harbor_repair_failed_after_registry_ready = ( + recovery.get("harbor_110_repair_failed_after_registry_ready") is True + ) + + if blocker_id == "harbor_110_remote_ssh_publickey_auth_stalled": + return { + "action_id": ( + "run_110_local_ssh_session_control_path_recovery_then_verify_cd_and_deploy_marker_readback" + ), + "stage": "local_console_control_path_receipt_required", + "action": ( + "Run 110 local-console SSH/session check, then verify registry, " + "queue, CD, and deploy-marker closure." + ), + "command": "recover-110-control-path-and-harbor-local.sh --check", + "post_verifier": ( + "read-public-gitea-actions-queue.py --json && " + "check-awoooi-110-controlled-cd-lane-readiness.sh" + ), + "requires_local_console": True, + "blocker_fields": [ + "latest_visible_harbor_110_repair_remote_ssh_publickey_auth_stalled", + external_blocker, + ], + } + + if control_path_pressure_blocker: + return { + "action_id": "wait_host_pressure_gate_then_rerun_110_control_path_readback", + "stage": "host_pressure_gate_wait", + "action": ( + "Keep host pressure protection fail-closed, then rerun 110 " + "control-path and queue readbacks." + ), + "command": "awoooi-wait-host-web-build-pressure.sh", + "post_verifier": "read-public-gitea-actions-queue.py --json", + "requires_local_console": False, + "blocker_fields": [control_path_pressure_blocker], + } + + if ( + deploy_marker_required + or cd_failed_after_registry_ready + or harbor_repair_failed_after_registry_ready + ): + return { + "action_id": "verify_registry_ready_then_close_cd_deploy_marker_readback", + "stage": "deploy_marker_closure_after_registry_ready", + "action": ( + "Keep registry 200/401 readiness separate from CD/deploy-marker " + "closure, then verify production priority readback." + ), + "command": "read-public-gitea-actions-queue.py --json", + "post_verifier": "awoooi production deploy marker readback", + "requires_local_console": False, + "blocker_fields": [ + "deploy_marker_readback_required_after_registry_ready", + "current_cd_failure_after_registry_ready", + "harbor_110_repair_failure_after_registry_ready", + ], + } + + return { + "action_id": "continue_ai_loop_current_blocker_readback", + "stage": "ai_loop_current_blocker_readback", + "action": "Continue metadata-only current blocker readback and writeback.", + "command": "read-public-gitea-actions-queue.py --json", + "post_verifier": "priority work-order readback", + "requires_local_console": False, + "blocker_fields": [external_blocker] if external_blocker else [], + } + + def _log_source_tagging_contract() -> list[dict[str, Any]]: metadata_boundary = { "metadata_only": True, @@ -895,6 +992,20 @@ def _queue_readback_normalizer_contract() -> list[dict[str, Any]]: ], "learning_targets": ["km", "rag", "playbook", "mcp", "verifier", "ai_agent"], }, + { + "field_id": "safe_next_action_id", + "purpose": ( + "publish one machine-readable next action from queue classifiers " + "so AI Loop does not treat stale jobs API payloads, runner waits, " + "or 110 SSH/session blockers as generic manual triage" + ), + "writes_blockers": [ + "gitea_queue_safe_next_action_missing", + "gitea_queue_safe_next_action_requires_local_console", + "gitea_queue_safe_next_action_stale_payload_quarantined", + ], + "learning_targets": ["km", "rag", "playbook", "mcp", "verifier", "ai_agent"], + }, { "field_id": "controlled_profile_no_matching_runner_labels", "purpose": ( diff --git a/apps/api/src/services/awoooi_priority_work_order_readback.py b/apps/api/src/services/awoooi_priority_work_order_readback.py index 2d4a4bbd..48263c25 100644 --- a/apps/api/src/services/awoooi_priority_work_order_readback.py +++ b/apps/api/src/services/awoooi_priority_work_order_readback.py @@ -579,6 +579,19 @@ def apply_ai_loop_current_blocker_execution_queue( pressure_blocker = str(first_item.get("control_path_pressure_blocker") or "") node_load_classifier = str(first_item.get("node_load_classifier") or "") runtime_write_gate = str(first_item.get("runtime_write_gate") or "") + safe_next_action_id = str(first_item.get("safe_next_action_id") or "") + safe_next_action_stage = str(first_item.get("safe_next_action_stage") or "") + safe_next_action = str(first_item.get("safe_next_action") or "") + safe_next_action_command = str(first_item.get("safe_next_action_command") or "") + safe_next_action_post_verifier = str( + first_item.get("safe_next_action_post_verifier") or "" + ) + safe_next_action_requires_local_console = bool( + first_item.get("safe_next_action_requires_local_console") is True + ) + safe_next_action_blocker_fields = _strings( + first_item.get("safe_next_action_blocker_fields") + ) registry_v2_ready = bool(first_item.get("registry_v2_ready") is True) registry_v2_status_classifier = str( first_item.get("registry_v2_status_classifier") or "" @@ -648,6 +661,16 @@ def apply_ai_loop_current_blocker_execution_queue( ) state["ai_loop_current_blocker_node_load_classifier"] = node_load_classifier state["ai_loop_current_blocker_runtime_write_gate"] = runtime_write_gate + state["ai_loop_current_blocker_safe_next_action_id"] = safe_next_action_id + state["ai_loop_current_blocker_safe_next_action_stage"] = ( + safe_next_action_stage + ) + state["ai_loop_current_blocker_safe_next_action_requires_local_console"] = ( + safe_next_action_requires_local_console + ) + state["ai_loop_current_blocker_safe_next_action_blocker_fields"] = ( + safe_next_action_blocker_fields + ) state["ai_loop_current_blocker_learning_target_count"] = len(learning_targets) state["ai_loop_current_blocker_local_console_phase_count"] = len( local_console_plan @@ -760,6 +783,25 @@ def apply_ai_loop_current_blocker_execution_queue( post_apply_verifier ) evidence["ai_loop_current_blocker_runtime_write_gate"] = runtime_write_gate + evidence["ai_loop_current_blocker_safe_next_action_id"] = ( + safe_next_action_id + ) + evidence["ai_loop_current_blocker_safe_next_action_stage"] = ( + safe_next_action_stage + ) + evidence["ai_loop_current_blocker_safe_next_action"] = safe_next_action + evidence["ai_loop_current_blocker_safe_next_action_command"] = ( + safe_next_action_command + ) + evidence["ai_loop_current_blocker_safe_next_action_post_verifier"] = ( + safe_next_action_post_verifier + ) + evidence["ai_loop_current_blocker_safe_next_action_requires_local_console"] = ( + safe_next_action_requires_local_console + ) + evidence["ai_loop_current_blocker_safe_next_action_blocker_fields"] = ( + safe_next_action_blocker_fields + ) evidence["ai_loop_current_blocker_control_path_blocker"] = external_blocker evidence["ai_loop_current_blocker_control_path_pressure_blocker"] = ( pressure_blocker @@ -937,6 +979,16 @@ def apply_ai_loop_current_blocker_execution_queue( controlled_recovery_package ) summary["ai_loop_current_blocker_post_apply_verifier"] = post_apply_verifier + summary["ai_loop_current_blocker_safe_next_action_id"] = safe_next_action_id + summary["ai_loop_current_blocker_safe_next_action_stage"] = ( + safe_next_action_stage + ) + summary["ai_loop_current_blocker_safe_next_action_requires_local_console"] = ( + safe_next_action_requires_local_console + ) + summary["ai_loop_current_blocker_safe_next_action_blocker_fields"] = ( + safe_next_action_blocker_fields + ) summary["ai_loop_current_blocker_learning_target_count"] = len(learning_targets) summary["ai_loop_current_blocker_local_console_phase_count"] = len( local_console_plan diff --git a/apps/api/tests/test_ai_agent_log_controlled_writeback_executor_readback_api.py b/apps/api/tests/test_ai_agent_log_controlled_writeback_executor_readback_api.py index ee724526..a6b89d7b 100644 --- a/apps/api/tests/test_ai_agent_log_controlled_writeback_executor_readback_api.py +++ b/apps/api/tests/test_ai_agent_log_controlled_writeback_executor_readback_api.py @@ -233,6 +233,16 @@ def _assert_executor_readback(payload: dict, *, public_endpoint: bool = False): "expected_schema" ] == "awoooi_production_deploy_readback_blocker_v1" assert current_queue[0]["harbor_recovery_receipt_output_contract_count"] == 8 + assert current_queue[0]["safe_next_action_id"] == ( + "run_110_local_ssh_session_control_path_recovery_then_verify_cd_and_deploy_marker_readback" + ) + assert current_queue[0]["safe_next_action_stage"] == ( + "local_console_control_path_receipt_required" + ) + assert current_queue[0]["safe_next_action_requires_local_console"] is True + assert current_queue[0]["safe_next_action_command"] == ( + "recover-110-control-path-and-harbor-local.sh --check" + ) assert [ item["output_id"] for item in current_queue[0]["harbor_recovery_receipt_output_contract"] @@ -260,7 +270,7 @@ def _assert_executor_readback(payload: dict, *, public_endpoint: bool = False): ] for item in current_queue[0]["harbor_recovery_receipt_output_contract"] ) - assert current_queue[0]["queue_readback_normalizer_contract_count"] == 14 + assert current_queue[0]["queue_readback_normalizer_contract_count"] == 15 assert [ item["field_id"] for item in current_queue[0]["queue_readback_normalizer_contract"] @@ -278,6 +288,7 @@ def _assert_executor_readback(payload: dict, *, public_endpoint: bool = False): "harbor_110_repair_failure_after_registry_ready", "harbor_110_repair_visible_running_jobs_api_stale", "current_cd_waiting_behind_harbor_110_repair_running", + "safe_next_action_id", "controlled_profile_no_matching_runner_labels", ] assert "gitea_queue_cd_jobs_stale_or_mismatched" in current_queue[0][ diff --git a/apps/api/tests/test_awoooi_priority_work_order_readback_api.py b/apps/api/tests/test_awoooi_priority_work_order_readback_api.py index 4cfd7d55..d07d9216 100644 --- a/apps/api/tests/test_awoooi_priority_work_order_readback_api.py +++ b/apps/api/tests/test_awoooi_priority_work_order_readback_api.py @@ -419,6 +419,21 @@ def test_awoooi_priority_work_order_readback_overlays_ai_loop_current_blocker_qu assert evidence["ai_loop_current_blocker_harbor_recovery_receipt_output_contract"][ 0 ]["writeback_targets"] == ["km", "rag", "playbook", "mcp", "verifier", "ai_agent"] + assert evidence["ai_loop_current_blocker_safe_next_action_id"] == ( + "run_110_local_ssh_session_control_path_recovery_then_verify_cd_and_deploy_marker_readback" + ) + assert evidence["ai_loop_current_blocker_safe_next_action_stage"] == ( + "local_console_control_path_receipt_required" + ) + assert ( + evidence[ + "ai_loop_current_blocker_safe_next_action_requires_local_console" + ] + is True + ) + assert evidence["ai_loop_current_blocker_safe_next_action_command"] == ( + "recover-110-control-path-and-harbor-local.sh --check" + ) assert evidence["ai_loop_current_blocker_queue_readback_normalizer_field_ids"] == [ "cd_run_jobs_payload_classifier", "harbor_110_repair_jobs_payload_classifier", @@ -433,6 +448,7 @@ def test_awoooi_priority_work_order_readback_overlays_ai_loop_current_blocker_qu "harbor_110_repair_failure_after_registry_ready", "harbor_110_repair_visible_running_jobs_api_stale", "current_cd_waiting_behind_harbor_110_repair_running", + "safe_next_action_id", "controlled_profile_no_matching_runner_labels", ] assert evidence["ai_loop_current_blocker_queue_readback_normalizer_contract"][0][ @@ -519,6 +535,12 @@ def test_awoooi_priority_work_order_readback_overlays_ai_loop_current_blocker_qu "load_not_high" ) assert payload["summary"]["ai_loop_current_blocker_local_console_phase_count"] == 5 + assert payload["summary"]["ai_loop_current_blocker_safe_next_action_id"] == ( + "run_110_local_ssh_session_control_path_recovery_then_verify_cd_and_deploy_marker_readback" + ) + assert payload["summary"][ + "ai_loop_current_blocker_safe_next_action_requires_local_console" + ] is True assert payload["summary"][ "ai_loop_current_blocker_post_recovery_readback_command_count" ] == 3 @@ -547,7 +569,7 @@ def test_awoooi_priority_work_order_readback_overlays_ai_loop_current_blocker_qu payload["summary"][ "ai_loop_current_blocker_queue_readback_normalizer_contract_count" ] - == 14 + == 15 ) assert payload["summary"][ "ai_loop_current_blocker_queue_readback_normalizer_field_ids" diff --git a/apps/web/messages/en.json b/apps/web/messages/en.json index 3726c83e..3bbb27a7 100644 --- a/apps/web/messages/en.json +++ b/apps/web/messages/en.json @@ -9061,6 +9061,9 @@ "queue": "Queue fields" }, "queueFields": "Queue normalizer fields", + "safeNextAction": "Next AI action", + "safeNextStage": "Stage: {stage} · local console: {local}", + "safeNextCommand": "Command: {command}", "receipt": { "inputs": "Receipt inputs", "outputs": "Receipt outputs" diff --git a/apps/web/messages/zh-TW.json b/apps/web/messages/zh-TW.json index 94bb026f..77c3baac 100644 --- a/apps/web/messages/zh-TW.json +++ b/apps/web/messages/zh-TW.json @@ -9061,6 +9061,9 @@ "queue": "Queue fields" }, "queueFields": "Queue normalizer fields", + "safeNextAction": "下一步 AI action", + "safeNextStage": "Stage:{stage} · local console:{local}", + "safeNextCommand": "Command:{command}", "receipt": { "inputs": "Receipt inputs", "outputs": "Receipt outputs" diff --git a/apps/web/src/app/[locale]/awooop/work-items/page.tsx b/apps/web/src/app/[locale]/awooop/work-items/page.tsx index 6529dacd..4fb68980 100644 --- a/apps/web/src/app/[locale]/awooop/work-items/page.tsx +++ b/apps/web/src/app/[locale]/awooop/work-items/page.tsx @@ -1037,6 +1037,9 @@ type PriorityWorkOrderResponse = { ai_loop_current_blocker_harbor_recovery_receipt_output_ids?: string[] | null; ai_loop_current_blocker_queue_readback_normalizer_contract_count?: number | null; ai_loop_current_blocker_queue_readback_normalizer_field_ids?: string[] | null; + ai_loop_current_blocker_safe_next_action_id?: string | null; + ai_loop_current_blocker_safe_next_action_stage?: string | null; + ai_loop_current_blocker_safe_next_action_requires_local_console?: boolean | null; ai_loop_log_source_grouping_key_count?: number | null; ai_loop_log_source_grouping_keys?: string[] | null; ai_loop_log_source_tagging_contract_count?: number | null; @@ -1048,6 +1051,12 @@ type PriorityWorkOrderResponse = { ai_loop_current_blocker_harbor_recovery_receipt_input_ids?: string[] | null; ai_loop_current_blocker_harbor_recovery_receipt_output_ids?: string[] | null; ai_loop_current_blocker_queue_readback_normalizer_field_ids?: string[] | null; + ai_loop_current_blocker_safe_next_action_id?: string | null; + ai_loop_current_blocker_safe_next_action_stage?: string | null; + ai_loop_current_blocker_safe_next_action?: string | null; + ai_loop_current_blocker_safe_next_action_command?: string | null; + ai_loop_current_blocker_safe_next_action_post_verifier?: string | null; + ai_loop_current_blocker_safe_next_action_requires_local_console?: boolean | null; } | null; }>; }; @@ -7805,6 +7814,20 @@ function AiLoopLogSourceTagsPanel({ summary?.ai_loop_current_blocker_queue_readback_normalizer_field_ids ?? evidence?.ai_loop_current_blocker_queue_readback_normalizer_field_ids ?? []; + const safeNextActionId = + summary?.ai_loop_current_blocker_safe_next_action_id ?? + evidence?.ai_loop_current_blocker_safe_next_action_id ?? + ""; + const safeNextActionStage = + summary?.ai_loop_current_blocker_safe_next_action_stage ?? + evidence?.ai_loop_current_blocker_safe_next_action_stage ?? + ""; + const safeNextActionCommand = + evidence?.ai_loop_current_blocker_safe_next_action_command ?? ""; + const safeNextRequiresLocalConsole = + summary?.ai_loop_current_blocker_safe_next_action_requires_local_console ?? + evidence?.ai_loop_current_blocker_safe_next_action_requires_local_console ?? + false; const labelMap: Record = { project_id: t("tagLabels.projectId"), product: t("tagLabels.product"), @@ -7899,6 +7922,17 @@ function AiLoopLogSourceTagsPanel({ }), tone: "border-[#cbd7bf] bg-[#f4faef] text-[#3d6b24]", }, + { + key: "safe-next-action", + icon: ArrowRight, + label: t("safeNextAction"), + value: safeNextActionId || "--", + detail: t("safeNextStage", { + stage: safeNextActionStage || "--", + local: String(Boolean(safeNextRequiresLocalConsole)), + }), + tone: "border-[#c9d8ea] bg-[#eef5ff] text-[#1f5b9b]", + }, ]; return ( @@ -7921,7 +7955,7 @@ function AiLoopLogSourceTagsPanel({

{t("subtitle")}

-
+
{visualCards.map((card) => { const Icon = card.icon; return ( @@ -7941,6 +7975,11 @@ function AiLoopLogSourceTagsPanel({
{loading ? "--" : card.value}
+ {"detail" in card ? ( +
+ {loading ? "--" : card.detail} +
+ ) : null}
); })} @@ -7963,6 +8002,11 @@ function AiLoopLogSourceTagsPanel({ ); })}
+ {safeNextActionCommand ? ( +
+ {t("safeNextCommand", { command: safeNextActionCommand })} +
+ ) : null}
{receiptMetrics.map((metric) => (
dict[str, Any]: + forbidden = [ + "read_runner_token_or_runner_file", + "restart_docker_daemon", + "reboot_host", + "node_drain", + "workflow_dispatch", + "force_push_or_ref_delete", + "raw_secret_volume_read", + ] + + def action( + *, + action_id: str, + stage: str, + text: str, + reason: str, + command: str, + post_verifier: str, + blocker_fields: list[str], + requires_local_console: bool = False, + ) -> dict[str, Any]: + return { + "action_id": action_id, + "stage": stage, + "action": text, + "reason": reason, + "command": command, + "post_verifier": post_verifier, + "requires_local_console": requires_local_console, + "metadata_only": True, + "blocker_fields": blocker_fields, + "forbidden_actions": forbidden, + } + + if remote_ssh_publickey_auth_stalled: + blocker_fields = ["latest_visible_harbor_110_repair_remote_ssh_publickey_auth_stalled"] + if remote_ssh_publickey_offer_timeout: + blocker_fields.append( + "latest_visible_harbor_110_repair_remote_ssh_publickey_offer_timeout" + ) + if remote_ssh_server_accepts_key_then_session_timeout: + blocker_fields.append( + "latest_visible_harbor_110_repair_remote_ssh_server_accepts_key_then_session_timeout" + ) + return action( + action_id=( + "run_110_local_ssh_session_control_path_recovery_then_verify_cd_and_deploy_marker_readback" + ), + stage="local_console_control_path_receipt_required", + text=( + "Use the 110 local-console controlled recovery package to verify " + "SSH account/session metadata, then rerun registry, queue, CD, " + "and deploy-marker readbacks." + ), + reason=( + "The public queue shows TCP/SSH reachability but publickey " + "authentication or session setup stalls; remote SSH cannot safely " + "repair itself." + ), + command="recover-110-control-path-and-harbor-local.sh --check", + post_verifier=( + "read-public-gitea-actions-queue.py --json && " + "check-awoooi-110-controlled-cd-lane-readiness.sh && " + "curl -k https://registry.wooo.work/v2/" + ), + blocker_fields=blocker_fields, + requires_local_console=True, + ) + + if remote_control_channel_unavailable: + return action( + action_id="restore_110_remote_control_channel_readback_from_local_console", + stage="local_console_control_channel_readback_required", + text=( + "Verify the 110 local control path and publish a metadata-only " + "receipt before retrying Harbor or CD closure." + ), + reason="The bounded remote SSH control channel is unavailable.", + command="recover-110-control-path-and-harbor-local.sh --check", + post_verifier="read-public-gitea-actions-queue.py --json", + blocker_fields=[ + "latest_visible_harbor_110_repair_remote_control_channel_unavailable" + ], + requires_local_console=True, + ) + + if current_cd_waiting_behind_harbor_110_repair_running: + blocker_fields = ["current_cd_waiting_behind_harbor_110_repair_running"] + if harbor_110_repair_visible_running_jobs_api_stale: + blocker_fields.append("harbor_110_repair_visible_running_jobs_api_stale") + return action( + action_id=( + "refresh_harbor_110_repair_log_truth_then_verify_cd_waiting_state" + ), + stage="queue_truth_refresh_required", + text=( + "Keep the visible CD wait state, ignore stale Harbor jobs API " + "payloads, and refresh the Harbor repair log plus queue readback." + ), + reason=( + "Current CD is waiting behind the visible Harbor repair lane, " + "while the jobs API may belong to another workflow or stale run." + ), + command="read-public-gitea-actions-queue.py --json", + post_verifier="read-public-gitea-actions-queue.py --json", + blocker_fields=blocker_fields, + ) + + if latest_cd_waiting or latest_cd_no_matching_runner_label: + blocker_fields = ["latest_visible_cd_run_waiting"] + if latest_cd_no_matching_runner_label: + blocker_fields.append("latest_visible_cd_no_matching_runner_label") + return action( + action_id="verify_non110_runner_lane_before_retrying_current_cd", + stage="non110_runner_lane_readiness_required", + text=( + "Verify the non-110 controlled CD runner lane and keep legacy or " + "generic runner labels closed." + ), + reason=( + f"The latest visible CD run is {latest_cd_status or 'unknown'} " + "or has no matching controlled runner label." + ), + command="check-awoooi-non110-runner-readiness.sh", + post_verifier="read-public-gitea-actions-queue.py --json", + blocker_fields=blocker_fields, + ) + + if harbor_110_repair_no_matching_runner_label: + return action( + action_id="verify_awoooi_host_controlled_repair_runner_lane", + stage="awoooi_host_runner_lane_readiness_required", + text=( + "Verify the awoooi-host controlled repair lane before retrying " + "Harbor local repair." + ), + reason="The Harbor 110 repair workflow has no matching controlled runner.", + command="check-awoooi-110-controlled-cd-lane-readiness.sh", + post_verifier="read-public-gitea-actions-queue.py --json", + blocker_fields=[ + "latest_visible_harbor_110_repair_no_matching_runner_label" + ], + ) + + if harbor_110_repair_waiting or harbor_110_repair_waiting_after_cd_harbor_blocker: + blocker_fields = ["latest_visible_harbor_110_repair_waiting"] + if harbor_110_repair_jobs_stale_or_mismatched: + blocker_fields.append("harbor_110_repair_jobs_stale_or_mismatched") + return action( + action_id="wait_for_harbor_110_repair_or_refresh_queue_truth", + stage="harbor_110_repair_queue_wait", + text=( + "Wait for the visible Harbor 110 repair lane or refresh queue truth " + "when the jobs API is stale." + ), + reason="The Harbor 110 repair workflow is visible but not complete.", + command="read-public-gitea-actions-queue.py --json", + post_verifier="read-public-gitea-actions-queue.py --json", + blocker_fields=blocker_fields, + ) + + if harbor_110_repair_failed: + blocker_fields = ["latest_visible_harbor_110_repair_failed"] + if harbor_110_repair_visible_failure_jobs_api_stale: + blocker_fields.append("harbor_110_repair_visible_failure_jobs_api_stale") + if harbor_110_repair_jobs_payload_classifier: + blocker_fields.append("harbor_110_repair_jobs_payload_classifier") + return action( + action_id="use_harbor_repair_log_classifier_then_submit_recovery_receipt", + stage="harbor_110_repair_failure_receipt_required", + text=( + "Use the Harbor repair log classifier as truth, quarantine stale " + "jobs API payloads, and submit the metadata-only recovery receipt." + ), + reason="The visible Harbor repair run failed.", + command="read-public-gitea-actions-queue.py --json", + post_verifier="harbor-registry-controlled-recovery-receipt readback", + blocker_fields=blocker_fields, + ) + + if build_harbor_public_route_blocked or build_harbor_public_route_retrying_unavailable: + return action( + action_id="run_harbor_registry_v2_verifier_before_repair", + stage="registry_v2_verifier_required", + text=( + "Verify public registry /v2/ first; only use Harbor repair if the " + "route is still below 200/401." + ), + reason="The CD log still carries Harbor public route unavailable evidence.", + command="curl -k https://registry.wooo.work/v2/", + post_verifier="read-public-gitea-actions-queue.py --json", + blocker_fields=[ + "latest_visible_cd_harbor_public_route_blocked", + "latest_visible_cd_harbor_public_route_retrying_unavailable", + ], + ) + + if effective_host_pressure_blocked_or_waiting: + return action( + action_id="wait_host_pressure_gate_then_rerun_cd_readback", + stage="host_pressure_gate_wait", + text=( + "Keep the host pressure gate fail-closed and rerun CD readback " + "after pressure clears." + ), + reason=effective_host_pressure_classifier or "host pressure is active", + command="awoooi-wait-host-web-build-pressure.sh", + post_verifier="read-public-gitea-actions-queue.py --json", + blocker_fields=["latest_visible_cd_host_pressure_classifier"], + ) + + if cd_jobs_stale_or_mismatched: + return action( + action_id="ignore_stale_cd_jobs_api_payload_and_poll_visible_cd_or_marker", + stage="cd_jobs_api_stale_payload_quarantine", + text=( + "Ignore stale CD jobs API payloads, poll the visible CD run/logs, " + "and verify production deploy marker before closing." + ), + reason=cd_jobs_payload_classifier or "CD jobs API payload is stale", + command="read-public-gitea-actions-queue.py --json", + post_verifier="awoooi production deploy marker readback", + blocker_fields=["cd_run_jobs_payload_classifier"], + ) + + if harbor_110_repair_jobs_stale_or_mismatched: + return action( + action_id="ignore_stale_harbor_jobs_api_payload_and_poll_visible_repair_log", + stage="harbor_jobs_api_stale_payload_quarantine", + text=( + "Ignore stale Harbor repair jobs API payloads and poll the visible " + "repair log or queue status." + ), + reason=harbor_110_repair_jobs_payload_classifier + or "Harbor repair jobs API payload is stale", + command="read-public-gitea-actions-queue.py --json", + post_verifier="read-public-gitea-actions-queue.py --json", + blocker_fields=["harbor_110_repair_jobs_payload_classifier"], + ) + + return action( + action_id="continue_public_gitea_queue_readback", + stage="queue_observation", + text="Continue public queue readback without credentials or runtime writes.", + reason="No more specific queue blocker was visible in the public readback.", + command="read-public-gitea-actions-queue.py --json", + post_verifier="read-public-gitea-actions-queue.py --json", + blocker_fields=[], + ) + + def classify_cd_build_log(text: str) -> dict[str, Any]: attempt_statuses: list[str] = [] attempt_numbers: list[int] = [] @@ -1416,6 +1770,9 @@ def _human_summary(payload: dict[str, Any]) -> str: "CURRENT_CD_WAITING_BEHIND_HARBOR_110_REPAIR_RUNNING=" f"{int(readback['current_cd_waiting_behind_harbor_110_repair_running'])}" ), + f"SAFE_NEXT_ACTION_ID={readback['safe_next_action_id']}", + f"SAFE_NEXT_ACTION_STAGE={readback['safe_next_action_stage']}", + f"SAFE_NEXT_ACTION_COMMAND={readback['safe_next_action_command']}", "WRITE_PERFORMED=false", "TOKEN_COLLECTED=false", ] diff --git a/ops/runner/test_read_public_gitea_actions_queue.py b/ops/runner/test_read_public_gitea_actions_queue.py index a6fab864..ef133f39 100644 --- a/ops/runner/test_read_public_gitea_actions_queue.py +++ b/ops/runner/test_read_public_gitea_actions_queue.py @@ -657,6 +657,18 @@ def test_harbor_ssh_blocker_takes_precedence_over_current_cd_waiting() -> None: payload["rollups"]["harbor_110_repair_remote_ssh_publickey_auth_stalled"] is True ) + assert payload["readback"]["safe_next_action_id"] == ( + "run_110_local_ssh_session_control_path_recovery_then_verify_cd_and_deploy_marker_readback" + ) + assert payload["readback"]["safe_next_action_stage"] == ( + "local_console_control_path_receipt_required" + ) + assert payload["readback"]["safe_next_action_requires_local_console"] is True + assert payload["readback"]["safe_next_action_metadata_only"] is True + assert "latest_visible_harbor_110_repair_remote_ssh_publickey_auth_stalled" in ( + payload["readback"]["safe_next_action_blocker_fields"] + ) + assert payload["rollups"]["safe_next_action_requires_local_console"] is True def test_build_readback_classifies_harbor_502_after_110_repair_jobs_success() -> None: @@ -920,6 +932,7 @@ def test_build_readback_classifies_harbor_repair_publickey_auth_stalled() -> Non "LATEST_VISIBLE_HARBOR_110_REPAIR_REMOTE_SSH_AUTH_PERMISSION_DENIED=False" in summary ) + assert "SAFE_NEXT_ACTION_STAGE=local_console_control_path_receipt_required" in summary assert payload["operation_boundaries"]["secret_or_runner_token_read"] is False assert payload["operation_boundaries"]["host_write_performed"] is False @@ -1280,6 +1293,15 @@ def test_build_readback_flags_stale_cd_jobs_api_payload() -> None: assert payload["readback"]["cd_run_jobs_payload_classifier"] == ( "cd_jobs_api_head_sha_mismatch_for_visible_cd_run" ) + assert payload["readback"]["safe_next_action_id"] == ( + "ignore_stale_cd_jobs_api_payload_and_poll_visible_cd_or_marker" + ) + assert payload["readback"]["safe_next_action_stage"] == ( + "cd_jobs_api_stale_payload_quarantine" + ) + assert payload["readback"]["safe_next_action_blocker_fields"] == [ + "cd_run_jobs_payload_classifier" + ] assert payload["rollups"]["cd_run_jobs_stale_or_mismatched"] is True assert payload["rollups"]["cd_run_jobs_payload_classifier"] == ( "cd_jobs_api_head_sha_mismatch_for_visible_cd_run"