feat(flywheel): Phase 2-3/2-5 — auto_repair outcome 寫入 + 134 筆 alertname 回填腳本
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
ADR-073 Phase 2-3: _try_auto_repair_background() 修復執行後寫入 Incident.outcome - effectiveness_score: 5(成功) / 2(失敗) - human_feedback: auto_repair:<playbook_id>:success|failed - should_remember: True(成功) → KMConversionService 飛輪入口 - 讓 KMConversionService 可依 outcome 判斷 EXECUTION_SUCCESS ADR-073 Phase 2-5: scripts/backfill_alertname.py - UPDATE incidents SET alertname = COALESCE(signals->0->>'alertname', signals->0->>'alert_name') - 已在 Pod 執行:134 筆 NULL → 0 筆 (2026-04-12 ogt) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -207,6 +207,25 @@ async def _try_auto_repair_background(
|
||||
},
|
||||
)
|
||||
|
||||
# ADR-073 Phase 2-3: 自動修復結果 → 寫入 Incident outcome (2026-04-12 ogt)
|
||||
# 讓 KMConversionService 可依 outcome 判斷是否為 EXECUTION_SUCCESS
|
||||
if result:
|
||||
_effectiveness = 5 if result.success else 2
|
||||
_feedback = (
|
||||
f"auto_repair:{result.playbook_id}:success"
|
||||
if result.success
|
||||
else f"auto_repair:{result.playbook_id}:failed:{result.error}"
|
||||
)
|
||||
try:
|
||||
await incident_service.update_outcome(
|
||||
incident_id=incident_id,
|
||||
effectiveness_score=_effectiveness,
|
||||
human_feedback=_feedback,
|
||||
should_remember=result.success,
|
||||
)
|
||||
except Exception as _outcome_err:
|
||||
logger.warning("auto_repair_outcome_write_failed", error=str(_outcome_err))
|
||||
|
||||
# 2026-04-10 Claude Sonnet 4.6 Asia/Taipei: 自動修復後更新 Telegram 卡片
|
||||
# 透過 TelegramGateway Service 層移除按鈕並回覆結果 (積木化鐵律)
|
||||
if result:
|
||||
|
||||
55
scripts/backfill_alertname.py
Normal file
55
scripts/backfill_alertname.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""
|
||||
ADR-073 Phase 2-5: 回填 incidents.alertname 欄位
|
||||
2026-04-12 ogt: 舊資料 signals JSONB 用 alert_name key (非 alertname alias)
|
||||
新資料 (Phase 2-1 修復後) 已有 alertname alias
|
||||
|
||||
執行方式:
|
||||
kubectl exec -n awoooi-prod <api-pod> -- python3 scripts/backfill_alertname.py
|
||||
"""
|
||||
import asyncio
|
||||
import sys
|
||||
sys.path.insert(0, "/app")
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
from src.db.base import get_db_context
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
async with get_db_context() as db:
|
||||
# 先查有多少需要回填
|
||||
count_r = await db.execute(
|
||||
text("SELECT COUNT(*) FROM incidents WHERE alertname IS NULL")
|
||||
)
|
||||
total = count_r.scalar()
|
||||
print(f"待回填: {total} 筆")
|
||||
|
||||
# 優先用 signals->0->>'alertname' (Phase 2-1 修復後的新寫法)
|
||||
# fallback: signals->0->>'alert_name' (舊寫法)
|
||||
result = await db.execute(
|
||||
text("""
|
||||
UPDATE incidents
|
||||
SET alertname = COALESCE(
|
||||
signals->0->>'alertname',
|
||||
signals->0->>'alert_name'
|
||||
)
|
||||
WHERE alertname IS NULL
|
||||
AND (
|
||||
signals->0->>'alertname' IS NOT NULL
|
||||
OR signals->0->>'alert_name' IS NOT NULL
|
||||
)
|
||||
""")
|
||||
)
|
||||
await db.commit()
|
||||
print(f"已回填: {result.rowcount} 筆")
|
||||
|
||||
# 剩餘無法回填的
|
||||
remain_r = await db.execute(
|
||||
text("SELECT COUNT(*) FROM incidents WHERE alertname IS NULL")
|
||||
)
|
||||
remain = remain_r.scalar()
|
||||
print(f"仍為 NULL: {remain} 筆 (signals 無 alert_name 欄位,無法回填)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user