fix(api): clear resolved ai loop control blocker
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 1s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / build-and-deploy (push) Has been cancelled
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / tests (push) Has been cancelled

This commit is contained in:
ogt
2026-07-02 00:23:02 +08:00
parent a15ab298ff
commit 97899ff5ad
3 changed files with 216 additions and 39 deletions

View File

@@ -510,6 +510,82 @@ def apply_ai_loop_current_blocker_execution_queue(
first_item = _dict(queue[0])
blocker_id = str(first_item.get("blocker_id") or "")
registry_v2_ready = bool(first_item.get("registry_v2_ready") is True)
registry_v2_status_classifier = str(
first_item.get("registry_v2_status_classifier") or ""
)
deployment_closure_state = str(first_item.get("deployment_closure_state") or "")
deploy_marker_readback_required = bool(
first_item.get("deploy_marker_readback_required") is True
)
current_cd_run_id = str(first_item.get("current_cd_run_id") or "")
current_cd_run_status = str(first_item.get("current_cd_run_status") or "")
current_cd_commit_sha = str(first_item.get("current_cd_commit_sha") or "")
cd_failed_after_registry_ready = bool(
first_item.get("cd_failed_after_registry_ready") is True
)
harbor_110_repair_run_id = str(first_item.get("harbor_110_repair_run_id") or "")
harbor_110_repair_run_status = str(
first_item.get("harbor_110_repair_run_status") or ""
)
harbor_110_repair_failure_classifier = str(
first_item.get("harbor_110_repair_failure_classifier") or ""
)
harbor_110_repair_failed_after_registry_ready = bool(
first_item.get("harbor_110_repair_failed_after_registry_ready") is True
)
external_blocker = str(first_item.get("external_control_path_blocker") or "")
pressure_blocker = str(first_item.get("control_path_pressure_blocker") or "")
node_load_classifier = str(first_item.get("node_load_classifier") or "")
state = _dict(payload.setdefault("mainline_execution_state", {}))
current_head = _dict(payload.get("current_head"))
production_readback_verified = bool(
state.get("current_main_cd_run_status") == "production_readback_verified"
and current_head.get("production_source_truth_available") is True
and _is_sha(str(state.get("latest_successful_deployed_source_sha") or ""))
)
deploy_marker_resolved_by_production_readback = bool(
production_readback_verified
and (deploy_marker_readback_required or cd_failed_after_registry_ready)
)
queue_resolved_by_production_readback = bool(
production_readback_verified
and registry_v2_ready
and (
deploy_marker_readback_required
or cd_failed_after_registry_ready
or harbor_110_repair_failed_after_registry_ready
)
)
if queue_resolved_by_production_readback:
_record_ai_loop_current_blocker_production_resolution(
payload=payload,
state=state,
queue_count=len(queue),
blocker_id=blocker_id,
registry_v2_ready=registry_v2_ready,
registry_v2_status_classifier=registry_v2_status_classifier,
deployment_closure_state=deployment_closure_state,
deploy_marker_readback_required=deploy_marker_readback_required,
current_cd_run_id=current_cd_run_id,
current_cd_run_status=current_cd_run_status,
current_cd_commit_sha=current_cd_commit_sha,
cd_failed_after_registry_ready=cd_failed_after_registry_ready,
harbor_110_repair_run_id=harbor_110_repair_run_id,
harbor_110_repair_run_status=harbor_110_repair_run_status,
harbor_110_repair_failure_classifier=(
harbor_110_repair_failure_classifier
),
harbor_110_repair_failed_after_registry_ready=(
harbor_110_repair_failed_after_registry_ready
),
external_blocker=external_blocker,
pressure_blocker=pressure_blocker,
node_load_classifier=node_load_classifier,
)
return
if not _ai_loop_current_blocker_can_override(
status=str(payload.get("status") or ""),
blocker_id=blocker_id,
@@ -575,9 +651,6 @@ def apply_ai_loop_current_blocker_execution_queue(
_dict(item) for item in _list(context.get("log_source_tagging_contract"))
]
forbidden_runtime_actions = _strings(first_item.get("forbidden_runtime_actions"))
external_blocker = str(first_item.get("external_control_path_blocker") or "")
pressure_blocker = str(first_item.get("control_path_pressure_blocker") or "")
node_load_classifier = str(first_item.get("node_load_classifier") or "")
runtime_write_gate = str(first_item.get("runtime_write_gate") or "")
safe_next_action_id = str(first_item.get("safe_next_action_id") or "")
safe_next_action_stage = str(first_item.get("safe_next_action_stage") or "")
@@ -592,42 +665,6 @@ def apply_ai_loop_current_blocker_execution_queue(
safe_next_action_blocker_fields = _strings(
first_item.get("safe_next_action_blocker_fields")
)
registry_v2_ready = bool(first_item.get("registry_v2_ready") is True)
registry_v2_status_classifier = str(
first_item.get("registry_v2_status_classifier") or ""
)
deployment_closure_state = str(first_item.get("deployment_closure_state") or "")
deploy_marker_readback_required = bool(
first_item.get("deploy_marker_readback_required") is True
)
current_cd_run_id = str(first_item.get("current_cd_run_id") or "")
current_cd_run_status = str(first_item.get("current_cd_run_status") or "")
current_cd_commit_sha = str(first_item.get("current_cd_commit_sha") or "")
cd_failed_after_registry_ready = bool(
first_item.get("cd_failed_after_registry_ready") is True
)
harbor_110_repair_run_id = str(first_item.get("harbor_110_repair_run_id") or "")
harbor_110_repair_run_status = str(
first_item.get("harbor_110_repair_run_status") or ""
)
harbor_110_repair_failure_classifier = str(
first_item.get("harbor_110_repair_failure_classifier") or ""
)
harbor_110_repair_failed_after_registry_ready = bool(
first_item.get("harbor_110_repair_failed_after_registry_ready") is True
)
state = _dict(payload.setdefault("mainline_execution_state", {}))
current_head = _dict(payload.get("current_head"))
production_readback_verified = bool(
state.get("current_main_cd_run_status") == "production_readback_verified"
and current_head.get("production_source_truth_available") is True
and _is_sha(str(state.get("latest_successful_deployed_source_sha") or ""))
)
deploy_marker_resolved_by_production_readback = bool(
production_readback_verified
and (deploy_marker_readback_required or cd_failed_after_registry_ready)
)
active_deployment_closure_state = (
"production_readback_verified"
if deploy_marker_resolved_by_production_readback
@@ -1132,6 +1169,110 @@ def apply_ai_loop_current_blocker_execution_queue(
)
def _record_ai_loop_current_blocker_production_resolution(
*,
payload: dict[str, Any],
state: dict[str, Any],
queue_count: int,
blocker_id: str,
registry_v2_ready: bool,
registry_v2_status_classifier: str,
deployment_closure_state: str,
deploy_marker_readback_required: bool,
current_cd_run_id: str,
current_cd_run_status: str,
current_cd_commit_sha: str,
cd_failed_after_registry_ready: bool,
harbor_110_repair_run_id: str,
harbor_110_repair_run_status: str,
harbor_110_repair_failure_classifier: str,
harbor_110_repair_failed_after_registry_ready: bool,
external_blocker: str,
pressure_blocker: str,
node_load_classifier: str,
) -> None:
"""Record a resolved AI-loop queue item without reopening active P0 state."""
production_sha = str(state.get("latest_successful_deployed_source_sha") or "")
production_run_id = str(state.get("current_main_cd_run_id") or "")
production_run_status = str(state.get("current_main_cd_run_status") or "")
resolved_fields: dict[str, Any] = {
"ai_loop_current_blocker_execution_queue_count": queue_count,
"ai_loop_current_blocker_id": blocker_id,
"ai_loop_current_blocker_resolved_by_production_readback": True,
"ai_loop_current_blocker_registry_v2_ready": registry_v2_ready,
"ai_loop_current_blocker_registry_v2_status_classifier": (
registry_v2_status_classifier
),
"ai_loop_current_blocker_deployment_closure_state": (
"production_readback_verified"
),
"ai_loop_current_blocker_deploy_marker_resolved_by_production_readback": (
True
),
"ai_loop_current_blocker_deploy_marker_readback_required": False,
"ai_loop_current_blocker_current_cd_run_id": production_run_id,
"ai_loop_current_blocker_current_cd_run_status": production_run_status,
"ai_loop_current_blocker_current_cd_commit_sha": production_sha,
"ai_loop_current_blocker_cd_failed_after_registry_ready": False,
"ai_loop_current_blocker_harbor_110_repair_failed_after_registry_ready": False,
"ai_loop_current_blocker_control_path_blocker": "",
"ai_loop_current_blocker_control_path_pressure_blocker": "",
"ai_loop_current_blocker_safe_next_action": "",
"ai_loop_current_blocker_safe_next_action_id": "",
"ai_loop_current_blocker_safe_next_action_stage": "",
"ai_loop_current_blocker_safe_next_action_command": "",
"ai_loop_current_blocker_safe_next_action_post_verifier": "",
"ai_loop_current_blocker_safe_next_action_requires_local_console": False,
"ai_loop_current_blocker_safe_next_action_blocker_fields": [],
"ai_loop_current_blocker_node_load_classifier": node_load_classifier,
"ai_loop_current_blocker_historical_deployment_closure_state": (
deployment_closure_state
),
"ai_loop_current_blocker_historical_deploy_marker_readback_required": (
deploy_marker_readback_required
),
"ai_loop_current_blocker_historical_current_cd_run_id": current_cd_run_id,
"ai_loop_current_blocker_historical_current_cd_run_status": (
current_cd_run_status
),
"ai_loop_current_blocker_historical_current_cd_commit_sha": (
current_cd_commit_sha
),
"ai_loop_current_blocker_historical_cd_failed_after_registry_ready": (
cd_failed_after_registry_ready
),
"ai_loop_current_blocker_historical_harbor_110_repair_run_id": (
harbor_110_repair_run_id
),
"ai_loop_current_blocker_historical_harbor_110_repair_run_status": (
harbor_110_repair_run_status
),
"ai_loop_current_blocker_historical_harbor_110_repair_failure_classifier": (
harbor_110_repair_failure_classifier
),
"ai_loop_current_blocker_historical_harbor_110_repair_failed_after_registry_ready": (
harbor_110_repair_failed_after_registry_ready
),
"ai_loop_current_blocker_historical_control_path_blocker": (
external_blocker
),
"ai_loop_current_blocker_historical_control_path_pressure_blocker": (
pressure_blocker
),
}
state.update(resolved_fields)
summary = _dict(payload.setdefault("summary", {}))
summary.update(resolved_fields)
for item in _list(payload.get("in_progress_or_blocked_in_priority_order")):
workplan = _dict(item)
if workplan.get("workplan_id") != "P0-006":
continue
evidence = _dict(workplan.setdefault("evidence", {}))
evidence.update(resolved_fields)
break
def _ai_loop_current_blocker_can_override(
*,
status: str,

View File

@@ -841,7 +841,13 @@ def test_awoooi_priority_work_order_readback_does_not_reopen_stale_cd_failure_af
state = payload["mainline_execution_state"]
evidence = payload["in_progress_or_blocked_in_priority_order"][0]["evidence"]
blockers = state["active_p0_live_active_blockers"]
assert payload["status"] == "p0_006_blocked_reboot_auto_recovery_slo_not_ready"
assert state["active_p0_state"] == "blocked_reboot_auto_recovery_slo_not_ready"
assert state["next_executable_mainline_workplan_id"] == (
"P0-006-REBOOT-AUTO-RECOVERY-SLO-SCORECARD"
)
assert state["current_main_cd_run_status"] == "production_readback_verified"
assert state["ai_loop_current_blocker_resolved_by_production_readback"] is True
assert state["ai_loop_current_blocker_deploy_marker_readback_required"] is False
assert state["ai_loop_current_blocker_cd_failed_after_registry_ready"] is False
assert (
@@ -854,14 +860,24 @@ def test_awoooi_priority_work_order_readback_does_not_reopen_stale_cd_failure_af
assert state["ai_loop_current_blocker_current_cd_run_id"] == (
f"production_readback:{runtime_short_sha}"
)
assert state["ai_loop_current_blocker_control_path_blocker"] == ""
assert state["ai_loop_current_blocker_historical_control_path_blocker"] == (
"remote_ssh_publickey_offer_timeout"
)
assert state["ai_loop_current_blocker_historical_current_cd_run_id"] == "4258"
assert state["ai_loop_current_blocker_historical_current_cd_run_status"] == (
"Failure"
)
assert "deploy_marker_readback_required_after_registry_ready" not in blockers
assert "current_cd_failure_after_registry_ready" not in blockers
assert "remote_ssh_publickey_offer_timeout" not in blockers
assert evidence["ai_loop_current_blocker_resolved_by_production_readback"] is True
assert evidence["ai_loop_current_blocker_deploy_marker_readback_required"] is False
assert evidence["ai_loop_current_blocker_cd_failed_after_registry_ready"] is False
assert evidence["ai_loop_current_blocker_control_path_blocker"] == ""
assert evidence["ai_loop_current_blocker_historical_control_path_blocker"] == (
"remote_ssh_publickey_offer_timeout"
)
assert evidence[
"ai_loop_current_blocker_historical_deploy_marker_readback_required"
] is True
@@ -874,6 +890,10 @@ def test_awoooi_priority_work_order_readback_does_not_reopen_stale_cd_failure_af
assert payload["summary"]["ai_loop_current_blocker_current_cd_run_status"] == (
"production_readback_verified"
)
assert payload["summary"]["ai_loop_current_blocker_control_path_blocker"] == ""
assert payload["summary"][
"ai_loop_current_blocker_historical_control_path_blocker"
] == "remote_ssh_publickey_offer_timeout"
assert all(
"P0-006-CD-DEPLOY-MARKER-READBACK" not in item
for item in payload["next_execution_order"]