fix(reboot): preserve service backup metric blockers
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 1m6s
CD Pipeline / build-and-deploy (push) Successful in 5m6s
AWOOOI Harbor 110 Local Repair / workflow-shape (push) Successful in 0s
AWOOOI Harbor 110 Local Repair / harbor-110-local-repair (push) Successful in 23s
CD Pipeline / post-deploy-checks (push) Has been cancelled
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 1m6s
CD Pipeline / build-and-deploy (push) Successful in 5m6s
AWOOOI Harbor 110 Local Repair / workflow-shape (push) Successful in 0s
AWOOOI Harbor 110 Local Repair / harbor-110-local-repair (push) Successful in 23s
CD Pipeline / post-deploy-checks (push) Has been cancelled
This commit is contained in:
@@ -68,6 +68,14 @@ _RUNTIME_BOOLEAN_READY_BLOCKERS = {
|
||||
"product_data_green_not_1": "product_data_green",
|
||||
"service_green_not_1": "service_green",
|
||||
}
|
||||
_SERVICE_DATA_BACKUP_BLOCKING_FIELD_BLOCKERS = {
|
||||
"backup_core_green": "backup_core_green_not_1",
|
||||
"host_188_service_green": "host_188_service_green_not_1",
|
||||
"post_start_blocked": "post_start_blocked_not_zero",
|
||||
"product_data_green": "product_data_green_not_1",
|
||||
"service_green": "service_green_not_1",
|
||||
"wazuh_dashboard_degraded": "wazuh_dashboard_degraded",
|
||||
}
|
||||
_PROMETHEUS_SOURCE_CONTROLLED_BLOCKERS = {
|
||||
"conversation_event_hot_path_index_migration_source_missing": (
|
||||
"conversation_event_hot_path_index_migration_source_present"
|
||||
@@ -421,6 +429,8 @@ def _annotate_prometheus_metric_readback(
|
||||
payload["runtime_metric_source_control_reconciled_blocker_count"] = 0
|
||||
payload["runtime_metric_runtime_readback_reconciled_blockers"] = []
|
||||
payload["runtime_metric_runtime_readback_reconciled_blocker_count"] = 0
|
||||
payload["runtime_metric_runtime_readback_added_blockers"] = []
|
||||
payload["runtime_metric_runtime_readback_added_blocker_count"] = 0
|
||||
|
||||
readback = _dict(payload.setdefault("readback", {}))
|
||||
readback["runtime_metric_readback_present"] = present
|
||||
@@ -435,6 +445,8 @@ def _annotate_prometheus_metric_readback(
|
||||
readback["runtime_metric_source_control_reconciled_blocker_count"] = 0
|
||||
readback["runtime_metric_runtime_readback_reconciled_blockers"] = []
|
||||
readback["runtime_metric_runtime_readback_reconciled_blocker_count"] = 0
|
||||
readback["runtime_metric_runtime_readback_added_blockers"] = []
|
||||
readback["runtime_metric_runtime_readback_added_blocker_count"] = 0
|
||||
|
||||
rollups = _dict(payload.setdefault("rollups", {}))
|
||||
rollups["runtime_metric_readback_present"] = present
|
||||
@@ -446,6 +458,7 @@ def _annotate_prometheus_metric_readback(
|
||||
]
|
||||
rollups["runtime_metric_source_control_reconciled_blocker_count"] = 0
|
||||
rollups["runtime_metric_runtime_readback_reconciled_blocker_count"] = 0
|
||||
rollups["runtime_metric_runtime_readback_added_blocker_count"] = 0
|
||||
_apply_prometheus_windows99_vmware_readback(payload, metric_readback)
|
||||
|
||||
|
||||
@@ -634,6 +647,44 @@ def _reconcile_prometheus_metric_active_blockers_with_runtime_readbacks(
|
||||
return _unique_strings(reconciled)
|
||||
|
||||
|
||||
def _add_runtime_readback_active_blockers_missing_from_metric(
|
||||
payload: dict[str, Any],
|
||||
active_blockers: list[str],
|
||||
) -> list[str]:
|
||||
service_backup = _dict(payload.get("controlled_service_data_backup_readback"))
|
||||
blocking_fields = _strings(service_backup.get("blocking_fields"))
|
||||
added = _unique_strings(
|
||||
[
|
||||
_SERVICE_DATA_BACKUP_BLOCKING_FIELD_BLOCKERS[field]
|
||||
for field in blocking_fields
|
||||
if field in _SERVICE_DATA_BACKUP_BLOCKING_FIELD_BLOCKERS
|
||||
]
|
||||
)
|
||||
if not added:
|
||||
return _unique_strings(active_blockers)
|
||||
|
||||
merged = _unique_strings([*active_blockers, *added])
|
||||
actually_added = [blocker for blocker in added if blocker not in active_blockers]
|
||||
if actually_added:
|
||||
payload["runtime_metric_runtime_readback_added_blockers"] = actually_added
|
||||
payload["runtime_metric_runtime_readback_added_blocker_count"] = len(
|
||||
actually_added
|
||||
)
|
||||
|
||||
readback = _dict(payload.setdefault("readback", {}))
|
||||
readback["runtime_metric_runtime_readback_added_blockers"] = actually_added
|
||||
readback["runtime_metric_runtime_readback_added_blocker_count"] = len(
|
||||
actually_added
|
||||
)
|
||||
|
||||
rollups = _dict(payload.setdefault("rollups", {}))
|
||||
rollups["runtime_metric_runtime_readback_added_blocker_count"] = len(
|
||||
actually_added
|
||||
)
|
||||
|
||||
return merged
|
||||
|
||||
|
||||
def _apply_prometheus_metric_active_blockers(
|
||||
payload: dict[str, Any],
|
||||
metric_readback: dict[str, Any],
|
||||
@@ -651,6 +702,10 @@ def _apply_prometheus_metric_active_blockers(
|
||||
payload,
|
||||
active_blockers,
|
||||
)
|
||||
active_blockers = _add_runtime_readback_active_blockers_missing_from_metric(
|
||||
payload,
|
||||
active_blockers,
|
||||
)
|
||||
|
||||
can_claim_slo = metric_readback.get("ready") is True and not active_blockers
|
||||
primary_blocker = str(
|
||||
|
||||
@@ -331,6 +331,16 @@ def test_reboot_auto_recovery_slo_scorecard_overlays_prometheus_runtime_metrics(
|
||||
payload = load_latest_reboot_auto_recovery_slo_scorecard(
|
||||
prometheus_metric_readback=PROMETHEUS_RUNTIME_READBACK
|
||||
)
|
||||
expected_runtime_readback_added_blockers = [
|
||||
"service_green_not_1",
|
||||
"post_start_blocked_not_zero",
|
||||
"backup_core_green_not_1",
|
||||
"wazuh_dashboard_degraded",
|
||||
]
|
||||
expected_active_blockers = [
|
||||
*PROMETHEUS_RUNTIME_SOURCE_RECONCILED_BLOCKERS,
|
||||
*expected_runtime_readback_added_blockers,
|
||||
]
|
||||
|
||||
assert payload["runtime_scorecard_readback_present"] is False
|
||||
assert payload["runtime_metric_readback_present"] is True
|
||||
@@ -341,17 +351,21 @@ def test_reboot_auto_recovery_slo_scorecard_overlays_prometheus_runtime_metrics(
|
||||
== [PROMETHEUS_SOURCE_RECONCILED_BLOCKER]
|
||||
)
|
||||
assert payload["runtime_metric_source_control_reconciled_blocker_count"] == 1
|
||||
assert payload["active_blockers"] == PROMETHEUS_RUNTIME_SOURCE_RECONCILED_BLOCKERS
|
||||
assert payload["active_blocker_count"] == 7
|
||||
assert payload["active_blockers"] == expected_active_blockers
|
||||
assert payload["active_blocker_count"] == 11
|
||||
assert payload["readiness_percent"] == 47
|
||||
assert payload["primary_blocker"] == "reboot_event_required_host_unreachable"
|
||||
assert payload["next_safe_action"] == (
|
||||
"rerun_reboot_event_detector_and_host_probe_verify_only_no_reboot"
|
||||
)
|
||||
assert "backup_core_green_not_1" not in payload["active_blockers"]
|
||||
assert "service_green_not_1" not in payload["active_blockers"]
|
||||
assert "backup_core_green_not_1" in payload["active_blockers"]
|
||||
assert "service_green_not_1" in payload["active_blockers"]
|
||||
assert PROMETHEUS_SOURCE_RECONCILED_BLOCKER not in payload["active_blockers"]
|
||||
assert payload["active_blocker_action_matrix"]["item_count"] == 7
|
||||
assert payload["runtime_metric_runtime_readback_added_blockers"] == (
|
||||
expected_runtime_readback_added_blockers
|
||||
)
|
||||
assert payload["runtime_metric_runtime_readback_added_blocker_count"] == 4
|
||||
assert payload["active_blocker_action_matrix"]["item_count"] == 11
|
||||
assert payload["windows99_vmware_autostart"]["readback_present"] is True
|
||||
assert payload["windows99_vmware_autostart"]["missing_vmx_aliases"] == ["111"]
|
||||
assert payload["windows99_vmware_autostart"]["powered_off_aliases"] == [
|
||||
@@ -401,16 +415,34 @@ def test_reboot_auto_recovery_slo_scorecard_overlays_prometheus_runtime_metrics(
|
||||
"host_cpu_pressure",
|
||||
0,
|
||||
) == 0
|
||||
assert payload["readback"]["active_blocker_count"] == 7
|
||||
assert payload["active_blocker_action_matrix"]["category_counts"][
|
||||
"post_reboot_service_readiness"
|
||||
] == 2
|
||||
assert payload["active_blocker_action_matrix"]["category_counts"][
|
||||
"backup_observability"
|
||||
] == 1
|
||||
assert payload["active_blocker_action_matrix"]["category_counts"][
|
||||
"security_observability"
|
||||
] == 1
|
||||
assert payload["readback"]["active_blocker_count"] == 11
|
||||
assert payload["readback"]["runtime_metric_readback_present"] is True
|
||||
assert payload["readback"][
|
||||
"runtime_metric_source_control_reconciled_blocker_count"
|
||||
] == 1
|
||||
assert payload["rollups"]["active_blocker_count"] == 7
|
||||
assert payload["readback"][
|
||||
"runtime_metric_runtime_readback_added_blockers"
|
||||
] == expected_runtime_readback_added_blockers
|
||||
assert payload["readback"][
|
||||
"runtime_metric_runtime_readback_added_blocker_count"
|
||||
] == 4
|
||||
assert payload["rollups"]["active_blocker_count"] == 11
|
||||
assert payload["rollups"]["runtime_metric_readback_present"] is True
|
||||
assert payload["rollups"][
|
||||
"runtime_metric_source_control_reconciled_blocker_count"
|
||||
] == 1
|
||||
assert payload["rollups"][
|
||||
"runtime_metric_runtime_readback_added_blocker_count"
|
||||
] == 4
|
||||
assert payload["rollups"]["primary_blocker_owner_lane"] == (
|
||||
"reboot_event_detector_and_host_probe"
|
||||
)
|
||||
@@ -433,8 +465,14 @@ def test_reboot_auto_recovery_slo_scorecard_keeps_prometheus_source_missing_when
|
||||
prometheus_metric_readback=PROMETHEUS_RUNTIME_READBACK,
|
||||
)
|
||||
|
||||
assert payload["active_blockers"] == PROMETHEUS_RUNTIME_BLOCKERS
|
||||
assert payload["active_blocker_count"] == 8
|
||||
assert payload["active_blockers"] == [
|
||||
*PROMETHEUS_RUNTIME_BLOCKERS,
|
||||
"service_green_not_1",
|
||||
"post_start_blocked_not_zero",
|
||||
"backup_core_green_not_1",
|
||||
"wazuh_dashboard_degraded",
|
||||
]
|
||||
assert payload["active_blocker_count"] == 12
|
||||
assert payload["runtime_metric_source_control_reconciled_blocker_count"] == 0
|
||||
action_by_blocker = {
|
||||
item["blocker"]: item
|
||||
|
||||
Reference in New Issue
Block a user