fix(classify): backup/heartbeat severity=warning/critical 告警恢復告警卡片格式
Some checks failed
CD Pipeline / build-and-deploy (push) Failing after 2m38s
Some checks failed
CD Pipeline / build-and-deploy (push) Failing after 2m38s
根因:classify_alert_early() backup 規則無 severity 條件,導致 VeleroBackupFailed / HostBackupFailed (warning/critical) 被分為 TYPE-1 (純資訊無按鈕),告警卡片格式遺失。 修復: - backup/heartbeat 關鍵字只在 severity=info/none 才命中 TYPE-1 - severity=warning/critical 的 backup 告警走正確 prefix 規則 (Velero→kubernetes TYPE-3, HostBackup→infrastructure TYPE-3) - Watchdog (severity=none) 由 severity 規則先命中,維持 TYPE-1 - 補強測試:25 cases,含 VeleroBackupFailed critical → kubernetes TYPE-3 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -126,7 +126,12 @@ def classify_alert_early(alertname: str, severity: str, _labels: dict) -> tuple[
|
||||
return "config_drift", "TYPE-4D"
|
||||
if severity in ("info", "none"):
|
||||
return "info", "TYPE-1"
|
||||
if any(kw in alertname_lower for kw in ("backup", "heartbeat")):
|
||||
# backup/heartbeat 關鍵字只有 severity=info/none 才是純資訊
|
||||
# severity=warning/critical(例如 VeleroBackupFailed, HostBackupFailed)→ 繼續走 prefix 規則
|
||||
if severity in ("info", "none") and any(kw in alertname_lower for kw in ("backup", "heartbeat")):
|
||||
return "backup", "TYPE-1"
|
||||
# Watchdog/Heartbeat 永遠是 TYPE-1(Alertmanager 心跳)
|
||||
if "watchdog" in alertname_lower or alertname in ("Heartbeat",):
|
||||
return "backup", "TYPE-1"
|
||||
if alertname.startswith(("Docker", "Host")):
|
||||
return "infrastructure", "TYPE-3"
|
||||
|
||||
@@ -47,20 +47,25 @@ class TestInfoAlerts:
|
||||
assert nt == "TYPE-1"
|
||||
assert ac == "info"
|
||||
|
||||
def test_backup_keyword(self):
|
||||
def test_backup_keyword_info_only(self):
|
||||
# severity=info → severity 規則先命中,TYPE-1
|
||||
ac, nt = classify_alert_early("BackupJobComplete", "info", {})
|
||||
assert nt == "TYPE-1"
|
||||
|
||||
def test_backup_keyword_warning_not_type1(self):
|
||||
# BackupJobFailed severity=warning → 繼續走 prefix 規則,不應是 TYPE-1
|
||||
ac, nt = classify_alert_early("BackupJobFailed", "warning", {})
|
||||
assert nt == "TYPE-1"
|
||||
assert ac == "backup"
|
||||
assert nt == "TYPE-3"
|
||||
|
||||
def test_heartbeat_keyword(self):
|
||||
ac, nt = classify_alert_early("WatchdogHeartbeat", "warning", {})
|
||||
def test_watchdog_heartbeat(self):
|
||||
# Watchdog (Alertmanager 心跳) severity=none → severity 規則先命中,TYPE-1
|
||||
ac, nt = classify_alert_early("Watchdog", "none", {})
|
||||
assert nt == "TYPE-1"
|
||||
assert ac == "backup"
|
||||
|
||||
def test_backup_case_insensitive(self):
|
||||
def test_backup_critical_not_type1(self):
|
||||
# critical backup 告警應走各自 prefix,不是純資訊
|
||||
ac, nt = classify_alert_early("BACKUP_MISSING", "critical", {})
|
||||
assert nt == "TYPE-1"
|
||||
assert ac == "backup"
|
||||
assert nt == "TYPE-3"
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
@@ -100,11 +105,16 @@ class TestKubernetes:
|
||||
assert nt == "TYPE-3"
|
||||
assert ac == "kubernetes"
|
||||
|
||||
def test_velero_backup_keyword_wins(self):
|
||||
# VeleroBackupFailed 含 "backup" → backup 規則優先於 kubernetes prefix
|
||||
def test_velero_backup_failed_is_kubernetes(self):
|
||||
# VeleroBackupFailed severity=critical → backup 規則不命中,走 Velero prefix → kubernetes TYPE-3
|
||||
ac, nt = classify_alert_early("VeleroBackupFailed", "critical", {})
|
||||
assert nt == "TYPE-3"
|
||||
assert ac == "kubernetes"
|
||||
|
||||
def test_velero_backup_success_info_is_type1(self):
|
||||
# VeleroBackupSuccess severity=info → TYPE-1
|
||||
ac, nt = classify_alert_early("VeleroBackupSuccess", "info", {})
|
||||
assert nt == "TYPE-1"
|
||||
assert ac == "backup"
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
Reference in New Issue
Block a user