feat(metrics): expose AI automation health gauges
Some checks failed
CD Pipeline / deploy (push) Has been cancelled
Some checks failed
CD Pipeline / deploy (push) Has been cancelled
This commit is contained in:
@@ -94,6 +94,7 @@
|
||||
- 2026-07-02 起 PChome safe mapping lane expansion 必須先從 direct mapping candidate decision lane 開始;`/api/ai/pchome-growth/mapping-backlog/direct-mapping-candidate-decision-lane-closeout-package` 會把 candidate decision package 收斂成 lane receipt、receipt replay、drift verifier 與 product readiness,輸出 `primary_human_gate_count=0`、`drift_count`、`next_machine_action` 與 hash evidence。此 endpoint 預設不執行搜尋、不開 DB、不寫 DB、不持久化候選,只在 `execute_search=1` 時走 controlled read-only candidate search。
|
||||
- 2026-07-02 起 AI automation scheduled health summary 必須提供 machine-readable endpoint;`/api/ai-automation/scheduled-health-summary` 會只讀 smoke history,並可選擇 `include_current_smoke=1` 執行不寫 history 的 current smoke,收斂 AI smoke、PChome drift monitor、history freshness、daily summary delivery readiness 四個 family,輸出 `primary_human_gate_count=0`、`writes_database_count=0`、`next_machine_actions` 與 scheduled output endpoints。此 endpoint 不寄 Telegram、不寫 DB、不改排程,只提供排程/監控可消費的健康摘要。
|
||||
- 2026-07-02 起 PChome controlled apply rollback evidence 必須提供聚合 endpoint;`/api/ai/pchome-growth/mapping-backlog/direct-mapping-retry-candidate-exception-controlled-apply-rollback-evidence-package` 會聚合 receipt replay、drift verifier、drift recovery、compact readback、artifact retention 五類 evidence,輸出 rollback required / ready actions / protected chain / next machine action。此 endpoint 不執行 rollback、不執行 re-apply、不執行 SQL、不寫 DB;0 drift 時必須輸出 no-op evidence,drift detected 時才輸出 check-mode reapply action。
|
||||
- 2026-07-02 起 `/metrics` 必須匯出 AI automation scheduled health summary gauges:`momo_ai_automation_scheduled_health_summary_total`、`momo_ai_automation_scheduled_health_family_status`、`momo_ai_automation_scheduled_health_primary_human_gate_count`、`momo_ai_automation_scheduled_health_writes_database_count`。Prometheus scrape 不得寄 Telegram、不寫 DB、不執行 current smoke,只讀 scheduled health summary history。
|
||||
- V10.644 起 `/ai_intelligence` 的商品明細列不得只用句子描述比價;每列必須顯示 PChome 價格、MOMO 參考價、差距、可信度四格價格證據,並保留下一步按鈕。單位價候選需顯示單位價與單位,候選待確認或缺資料則以「待補 / 候選待確認」呈現,不得捏造價格。
|
||||
- V10.645 起 `/ai_intelligence` 的商品明細分流切換後,必須顯示「這類商品怎麼處理」的行動摘要,包含件數、近 7 天業績、平均可信度、最大價差、代表商品與主按鈕;使用者不得只能看到商品列表而不知道下一步。
|
||||
- V10.646 起 `/ai_intelligence` 的商品明細必須提供搜尋與排序;搜尋至少涵蓋商品、分類、商品編號與 MOMO 候選資訊,排序至少支援優先級、近 7 天業績、價差、下滑幅度與可信度。搜尋/排序後的行動摘要與明細列表必須使用同一批結果。
|
||||
|
||||
@@ -101,7 +101,7 @@
|
||||
|
||||
## P1 - Product Visibility And Professional Website Experience
|
||||
|
||||
狀態: 進行中。
|
||||
狀態: 已完成。
|
||||
|
||||
目的: 讓 AI 自動化在產品裡可見,成為專業營運工作流,而不是只藏在後端。
|
||||
|
||||
@@ -223,6 +223,7 @@
|
||||
| P3.1 | Extend receipt / replay / drift pattern to more lanes | 已完成 | direct mapping candidate decision lane closeout route + focused tests | P3.2 scheduled automation health summaries |
|
||||
| P3.2 | Scheduled automation health summaries | 已完成 | `/api/ai-automation/scheduled-health-summary` + smoke service focused tests | P3.3 rollback evidence packages |
|
||||
| P3.3 | Rollback evidence packages | 已完成 | controlled apply rollback evidence route + focused tests | P3.4 observability metrics integration |
|
||||
| P3.4 | Observability metrics integration | 已完成 | `/metrics` exports scheduled health summary gauges + focused tests | P4 source / deployment governance ongoing |
|
||||
|
||||
## 後續回報格式
|
||||
|
||||
|
||||
@@ -342,6 +342,20 @@ def prometheus_metrics():
|
||||
except Exception as e:
|
||||
sys_log.warning(f"[Metrics] 無法取得 AI 自動化指標: {e}")
|
||||
|
||||
try:
|
||||
from services.ai_automation_smoke_service import build_scheduled_automation_health_summary
|
||||
|
||||
_register_ai_automation_health_summary_metrics(
|
||||
registry,
|
||||
Gauge,
|
||||
build_scheduled_automation_health_summary(
|
||||
history_limit=50,
|
||||
include_current_smoke=False,
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
sys_log.warning(f"[Metrics] 無法取得 AI 自動化健康摘要指標: {e}")
|
||||
|
||||
return Response(generate_latest(registry), mimetype=CONTENT_TYPE_LATEST)
|
||||
|
||||
except ImportError:
|
||||
@@ -472,6 +486,50 @@ def _register_ai_automation_metrics(registry, gauge_cls, metrics_snapshot):
|
||||
gauge.labels(**{name: label_values.get(name, "unknown") for name in label_names}).set(values.get(suffix, 0))
|
||||
|
||||
|
||||
def _register_ai_automation_health_summary_metrics(registry, gauge_cls, health_summary):
|
||||
"""Export scheduled AI automation health summary into Prometheus gauges."""
|
||||
summary = health_summary.get("summary") or {}
|
||||
status_counts = {
|
||||
"ok": int(summary.get("ok") or 0),
|
||||
"warning": int(summary.get("warning") or 0),
|
||||
"critical": int(summary.get("critical") or 0),
|
||||
"total": int(summary.get("total") or 0),
|
||||
}
|
||||
status_gauge = gauge_cls(
|
||||
"momo_ai_automation_scheduled_health_summary_total",
|
||||
"AI automation scheduled health family counts",
|
||||
["status"],
|
||||
registry=registry,
|
||||
)
|
||||
for status, value in status_counts.items():
|
||||
status_gauge.labels(status=status).set(value)
|
||||
|
||||
family_status = gauge_cls(
|
||||
"momo_ai_automation_scheduled_health_family_status",
|
||||
"AI automation scheduled health family status. Value is 1 for the current status label.",
|
||||
["family", "status"],
|
||||
registry=registry,
|
||||
)
|
||||
for family in health_summary.get("families", []) or []:
|
||||
family_key = str(family.get("key") or "unknown")[:80]
|
||||
status = str(family.get("status") or "unknown")[:40]
|
||||
family_status.labels(family=family_key, status=status).set(1)
|
||||
|
||||
human_gate_gauge = gauge_cls(
|
||||
"momo_ai_automation_scheduled_health_primary_human_gate_count",
|
||||
"AI automation scheduled health primary human gate count",
|
||||
registry=registry,
|
||||
)
|
||||
human_gate_gauge.set(int(summary.get("primary_human_gate_count") or 0))
|
||||
|
||||
write_gauge = gauge_cls(
|
||||
"momo_ai_automation_scheduled_health_writes_database_count",
|
||||
"AI automation scheduled health database write count",
|
||||
registry=registry,
|
||||
)
|
||||
write_gauge.set(int(summary.get("writes_database_count") or 0))
|
||||
|
||||
|
||||
@system_public_bp.route('/settings')
|
||||
def settings():
|
||||
"""分類設定頁面"""
|
||||
|
||||
@@ -97,6 +97,45 @@ def test_system_metrics_exports_ai_automation_zero_baseline():
|
||||
)
|
||||
|
||||
|
||||
def test_system_metrics_exports_scheduled_health_summary():
|
||||
from prometheus_client import CollectorRegistry, Gauge, generate_latest
|
||||
from routes.system_public_routes import _register_ai_automation_health_summary_metrics
|
||||
|
||||
registry = CollectorRegistry()
|
||||
_register_ai_automation_health_summary_metrics(
|
||||
registry,
|
||||
Gauge,
|
||||
{
|
||||
"summary": {
|
||||
"ok": 3,
|
||||
"warning": 1,
|
||||
"critical": 0,
|
||||
"total": 4,
|
||||
"primary_human_gate_count": 0,
|
||||
"writes_database_count": 0,
|
||||
},
|
||||
"families": [
|
||||
{"key": "ai_automation_smoke", "status": "ok"},
|
||||
{"key": "pchome_controlled_apply_drift_monitor", "status": "warning"},
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
output = generate_latest(registry).decode("utf-8")
|
||||
assert 'momo_ai_automation_scheduled_health_summary_total{status="ok"} 3.0' in output
|
||||
assert 'momo_ai_automation_scheduled_health_summary_total{status="warning"} 1.0' in output
|
||||
assert (
|
||||
'momo_ai_automation_scheduled_health_family_status{family="ai_automation_smoke",status="ok"} 1.0'
|
||||
in output
|
||||
)
|
||||
assert (
|
||||
'momo_ai_automation_scheduled_health_family_status{family="pchome_controlled_apply_drift_monitor",status="warning"} 1.0'
|
||||
in output
|
||||
)
|
||||
assert "momo_ai_automation_scheduled_health_primary_human_gate_count 0.0" in output
|
||||
assert "momo_ai_automation_scheduled_health_writes_database_count 0.0" in output
|
||||
|
||||
|
||||
def test_system_metrics_counts_sales_records_with_raw_count_query():
|
||||
from prometheus_client import CollectorRegistry, Gauge, generate_latest
|
||||
from routes.system_public_routes import _set_database_record_counts
|
||||
|
||||
Reference in New Issue
Block a user