fix(reboot): reconcile green runtime metric blockers
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 1m5s
CD Pipeline / post-deploy-checks (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Has been cancelled

This commit is contained in:
Your Name
2026-07-03 02:55:53 +08:00
parent 8c46ffd49c
commit db425d3b8a
2 changed files with 51 additions and 3 deletions

View File

@@ -62,6 +62,12 @@ _PUBLIC_MAINTENANCE_BLOCKERS = {
"public_route_raw_5xx_without_maintenance_fallback",
"public_route_unreachable_without_external_l1_fallback",
}
_RUNTIME_BOOLEAN_READY_BLOCKERS = {
"backup_core_green_not_1": "backup_core_green",
"host_188_service_green_not_1": "host_188_service_green",
"product_data_green_not_1": "product_data_green",
"service_green_not_1": "service_green",
}
_PROMETHEUS_SOURCE_CONTROLLED_BLOCKERS = {
"conversation_event_hot_path_index_migration_source_missing": (
"conversation_event_hot_path_index_migration_source_present"
@@ -572,6 +578,7 @@ def _reconcile_prometheus_metric_active_blockers_with_runtime_readbacks(
payload: dict[str, Any],
active_blockers: list[str],
) -> list[str]:
runtime_ready_blockers: set[str] = set()
public_maintenance = _dict(payload.get("public_maintenance_fallback"))
public_maintenance_ready = (
public_maintenance.get("runtime_readback_present") is True
@@ -582,9 +589,19 @@ def _reconcile_prometheus_metric_active_blockers_with_runtime_readbacks(
)
== 0
)
runtime_ready_blockers = (
_PUBLIC_MAINTENANCE_BLOCKERS if public_maintenance_ready else set()
)
if public_maintenance_ready:
runtime_ready_blockers.update(_PUBLIC_MAINTENANCE_BLOCKERS)
service_backup = _dict(payload.get("controlled_service_data_backup_readback"))
rollups = _dict(payload.get("rollups"))
for blocker, ready_key in _RUNTIME_BOOLEAN_READY_BLOCKERS.items():
if (
payload.get(ready_key) is True
or service_backup.get(ready_key) is True
or rollups.get(ready_key) is True
):
runtime_ready_blockers.add(blocker)
if not runtime_ready_blockers:
return _unique_strings(active_blockers)

View File

@@ -190,6 +190,37 @@ def test_reboot_auto_recovery_slo_scorecard_does_not_reopen_ready_public_mainten
] == 1
def test_reboot_auto_recovery_slo_scorecard_reconciles_green_runtime_readback_metric_blockers():
metric_readback = dict(PROMETHEUS_RUNTIME_READBACK)
metric_readback["active_blockers"] = [
"backup_core_green_not_1",
"host_188_service_green_not_1",
"product_data_green_not_1",
"service_green_not_1",
*PROMETHEUS_RUNTIME_BLOCKERS,
]
metric_readback["active_blocker_count"] = len(metric_readback["active_blockers"])
payload = load_latest_reboot_auto_recovery_slo_scorecard(
prometheus_metric_readback=metric_readback,
)
assert payload["product_data_green"] is True
assert payload["host_188_service_green"] is True
assert payload["service_green"] is False
assert payload["backup_core_green"] is False
assert "product_data_green_not_1" not in payload["active_blockers"]
assert "host_188_service_green_not_1" not in payload["active_blockers"]
assert "service_green_not_1" in payload["active_blockers"]
assert "backup_core_green_not_1" in payload["active_blockers"]
assert payload["readback"][
"runtime_metric_runtime_readback_reconciled_blockers"
] == ["host_188_service_green_not_1", "product_data_green_not_1"]
assert payload["rollups"][
"runtime_metric_runtime_readback_reconciled_blocker_count"
] == 2
def test_reboot_auto_recovery_slo_scorecard_overlays_runtime_scorecard_artifact(
tmp_path,
):