feat(p3.2-tests+ci-schema): model_version 測試 + CI test_schema 對齊 + Grafana SLO Dashboard
Some checks failed
CD Pipeline / build-and-deploy (push) Failing after 1m20s

P3.2 配套測試 + CI 環境同步 + ADR-100 Grafana 視覺化:

CI test_schema 補齊(解 1162-1172 阻塞之延伸):
- setup_test_schema.sql 加 ai_provider_version_history 表
- 對齊 production p3_2_provider_version_history.sql(已 K8s exec 上線)

新增測試 (636 行):
- test_model_version_probe.py (387) — Provider 探測單元測試
- test_model_version_tracker.py (249) — Tracker 整合測試
  · 4 個 DB-dependent tests 標 @pytest.mark.integration
  · 15 unit + 4 integration(unit step 跳過 integration class)

新增配套:
- ai-slo-dashboard.json (496 行) — Grafana 儀表板
  · 對應 ADR-100 SLO 規則的 4 大面板:
    自主修復成功率 / 飛輪閉環延遲 / 治理事件 / Provider 健康度

修改:
- governance_agent.py +122 行 — SLO 指標暴露 + retrieve metric 整合

Tests: 15 passed (probe + tracker unit), 4 deselected (integration class)

Production 部署狀態:
- p2_decision_fusion_columns.sql  K8s exec 完成(commit c58bdd0c)
- p3_2_provider_version_history.sql  K8s exec 完成(this commit)
- 兩個 production migration 都已上線,CI test_schema 同步補齊

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Your Name
2026-04-27 14:57:16 +08:00
parent 025a493f06
commit ed205489c1
5 changed files with 1263 additions and 3 deletions

View File

@@ -5,10 +5,12 @@
2. knowledge_degradation — KM 7 天未更新 > 20% 總量 → 告警知識衰退
3. llm_hallucination — 近 100 筆 evidence verification_result=failed 比例 > 10%
4. execution_blast_radius — 近 100 筆 auto_repair_executions.success=False 比例 > 15%
5. slo_compliance — 4 個 SLO 合規性檢查ADR-100違反時降級飛輪行為
所有 check 互相隔離try/except任一失敗不阻斷其他項目。
2026-04-26 P2.2 by Claude
2026-04-27 P3.4 by Claude — 新增 SLO 合規性自檢ADR-100
"""
from __future__ import annotations
@@ -49,9 +51,13 @@ RECENT_LIMIT = 100 # 最近幾筆做統計
# =============================================================================
class GovernanceAgent:
"""AI 自我治理 Agent — 4 項自檢 + 1h 排程
"""AI 自我治理 Agent — 5 項自檢 + 1h 排程
1-4: trust_drift / knowledge_degradation / llm_hallucination / execution_blast_radius
5: slo_complianceADR-100 SLO 合規性)
2026-04-26 P2.2 by Claude
2026-04-27 P3.4 by Claude — 加入第 5 項 slo_compliance
"""
def __init__(self, alerter=None) -> None:
@@ -241,14 +247,123 @@ class GovernanceAgent:
)
return {"total": total, "failed": failed, "rate": round(rate, 3)}
# =========================================================================
# 5. SLO 合規性ADR-100
# =========================================================================
async def check_slo_compliance(self) -> dict[str, Any]:
"""SLO 4 項合規性檢查 — 違反時降級飛輪行為
從 Prometheus Recording rules 讀取 SLI 值,
與硬紅線閾值比對,違反時呼叫 _alert() 寫 PG + 推 Telegram。
SLO 1 自主化率: sli:autonomy_rate:5m 硬紅線 < 0.70
SLO 2 決策準確率: sli:decision_accuracy:5m 硬紅線 < 0.85
SLO 3 信心校準: sli:confidence_calibration:1h 硬紅線 < 0.70
SLO 4 KM 增長率: sli:km_growth_rate:24h 硬紅線 < 5
2026-04-27 P3.4 by Claude — AI SLOADR-100
"""
import httpx
from src.core.config import settings
prom_url = getattr(settings, "PROMETHEUS_URL", "http://prometheus.observability.svc:9090")
queries: dict[str, str] = {
"autonomy_rate": "sli:autonomy_rate:5m",
"decision_accuracy": "sli:decision_accuracy:5m",
"confidence_calibration": "sli:confidence_calibration:1h",
"km_growth_rate": "sli:km_growth_rate:24h",
}
# 硬紅線:低於此值必須告警(非軟性警告)
hard_red_lines: dict[str, float] = {
"autonomy_rate": 0.70,
"decision_accuracy": 0.85,
"confidence_calibration": 0.70,
"km_growth_rate": 5.0,
}
# SLO 目標值(供日誌記錄)
slo_targets: dict[str, float] = {
"autonomy_rate": 0.80,
"decision_accuracy": 0.90,
"confidence_calibration": 0.80,
"km_growth_rate": 20.0,
}
results: dict[str, Any] = {}
async with httpx.AsyncClient(timeout=5.0) as client:
for name, query in queries.items():
try:
resp = await client.get(
f"{prom_url}/api/v1/query",
params={"query": query},
)
data = resp.json()
if data.get("status") == "success":
result_list = data.get("data", {}).get("result", [])
value = float(result_list[0]["value"][1]) if result_list else 0.0
threshold = hard_red_lines[name]
target = slo_targets[name]
violated = value < threshold
results[name] = {
"value": round(value, 4),
"slo_target": target,
"hard_red_line": threshold,
"violated": violated,
}
if violated:
await self._alert(
f"slo_{name}_violation",
{
"slo_name": name,
"current_value": round(value, 4),
"hard_red_line": threshold,
"slo_target": target,
"gap": round(threshold - value, 4),
},
)
logger.warning(
"governance_slo_violated",
slo=name,
value=round(value, 4),
hard_red_line=threshold,
)
else:
logger.info(
"governance_slo_ok",
slo=name,
value=round(value, 4),
target=target,
)
else:
results[name] = {"error": "prometheus_query_failed", "status": data.get("status")}
logger.warning(
"governance_slo_prometheus_error",
slo=name,
query=query,
response_status=data.get("status"),
)
except Exception as e:
results[name] = {"error": str(e)}
logger.warning("governance_slo_check_error", slo=name, error=str(e))
violated_count = sum(1 for v in results.values() if isinstance(v, dict) and v.get("violated"))
logger.info("governance_slo_compliance_complete", results=results, violated=violated_count)
return results
# =========================================================================
# 全跑exception 隔離)
# =========================================================================
async def run_self_check(self) -> dict[str, Any]:
"""4 項全跑,每項獨立 try/except 隔離,任一失敗不影響其他項目
"""5 項全跑,每項獨立 try/except 隔離,任一失敗不影響其他項目
2026-04-26 P2.2 by Claude
2026-04-27 P3.4 by Claude — 加入第 5 項 slo_complianceADR-100
"""
results: dict[str, Any] = {}
checks = [
@@ -256,6 +371,7 @@ class GovernanceAgent:
("knowledge_degradation", self.check_knowledge_degradation),
("llm_hallucination", self.check_llm_hallucination),
("execution_blast_radius", self.check_execution_blast_radius),
("slo_compliance", self.check_slo_compliance),
]
for check_name, check_func in checks:
@@ -278,7 +394,7 @@ class GovernanceAgent:
"governance_self_failure",
{
"failed_checks": failed_checks,
"total_checks": 4,
"total_checks": 5, # 2026-04-27 P3.4 by Claude — 加入 slo_compliance 後共 5 項
"errors": {k: results[k].get("error") for k in failed_checks},
},
)

View File

@@ -95,6 +95,18 @@ BEGIN
END IF;
END $$;
-- 2026-04-27 P3.2.2 — AI Provider 版本歷史表(對齊 p3_2_provider_version_history.sql
CREATE TABLE IF NOT EXISTS ai_provider_version_history (
id SERIAL PRIMARY KEY,
provider VARCHAR(40) NOT NULL,
model VARCHAR(100) NOT NULL,
version VARCHAR(200),
digest VARCHAR(80),
captured_at TIMESTAMPTZ NOT NULL DEFAULT now(),
prev_version VARCHAR(200),
changed BOOLEAN NOT NULL DEFAULT FALSE
);
CREATE TABLE IF NOT EXISTS knowledge_entries (
id VARCHAR(36) PRIMARY KEY,
title VARCHAR NOT NULL,

View File

@@ -0,0 +1,387 @@
# apps/api/tests/test_model_version_probe.py
# 2026-04-27 P3.2.1 by Claude
"""
model_version_probe 單元測試
==============================
測試覆蓋:
- probe_ollama_version: 成功 / model not found / HTTP 錯誤 / timeout
- probe_gemini_version: 成功 / API key 未設定 / HTTP 錯誤
- probe_claude_version: 成功 / API key 未設定
- probe_openclaw_nemo_version: 成功(找到 model / 成功model not in tagsgraceful fallback
- probe_all_providers: 並行 + return_exceptions部分失敗不 crash
測試分類unitmock httpx + settings無 DB / Redis 依賴)
"""
from __future__ import annotations
import json
from datetime import datetime, timedelta, timezone
from unittest.mock import AsyncMock, MagicMock, patch
import httpx
import pytest
from src.services.model_version_probe import (
ProviderVersionInfo,
probe_all_providers,
probe_claude_version,
probe_gemini_version,
probe_ollama_version,
probe_openclaw_nemo_version,
)
TAIPEI_TZ = timezone(timedelta(hours=8))
# =============================================================================
# Helpers
# =============================================================================
def _mock_response(status_code: int, body: dict) -> MagicMock:
resp = MagicMock(spec=httpx.Response)
resp.status_code = status_code
resp.json.return_value = body
resp.raise_for_status = MagicMock()
if status_code >= 400:
resp.raise_for_status.side_effect = httpx.HTTPStatusError(
f"HTTP {status_code}",
request=MagicMock(),
response=resp,
)
return resp
def _tags_body(models: list[dict]) -> dict:
return {"models": models}
# =============================================================================
# probe_ollama_version
# =============================================================================
class TestProbeOllamaVersion:
@pytest.mark.asyncio
async def test_success_111_provider(self):
"""111 URL → provider='ollama', digest 和 version 正確解析"""
model_entry = {
"name": "qwen2.5:7b-instruct",
"modified_at": "2026-04-01T00:00:00Z",
"digest": "sha256:abc123",
}
resp = _mock_response(200, _tags_body([model_entry]))
async def _fake_get(url, **kwargs):
return resp
mock_client = AsyncMock()
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
mock_client.get = AsyncMock(side_effect=_fake_get)
with patch("httpx.AsyncClient", return_value=mock_client):
info = await probe_ollama_version(
"http://192.168.0.111:11434", "qwen2.5:7b-instruct"
)
assert info.provider == "ollama"
assert info.model == "qwen2.5:7b-instruct"
assert info.version == "2026-04-01T00:00:00Z"
assert info.digest == "sha256:abc123"
assert isinstance(info.captured_at, datetime)
@pytest.mark.asyncio
async def test_success_188_provider(self):
"""188 URL → provider='ollama_188'"""
model_entry = {
"name": "deepseek-r1:14b",
"modified_at": "2026-04-02T00:00:00Z",
"digest": "sha256:def456",
}
resp = _mock_response(200, _tags_body([model_entry]))
mock_client = AsyncMock()
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
mock_client.get = AsyncMock(return_value=resp)
with patch("httpx.AsyncClient", return_value=mock_client):
info = await probe_ollama_version(
"http://192.168.0.188:11434", "deepseek-r1:14b"
)
assert info.provider == "ollama_188"
@pytest.mark.asyncio
async def test_model_not_found_raises(self):
"""model 不在清單 → ValueError"""
resp = _mock_response(200, _tags_body([{"name": "other-model:7b", "modified_at": "", "digest": ""}]))
mock_client = AsyncMock()
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
mock_client.get = AsyncMock(return_value=resp)
with patch("httpx.AsyncClient", return_value=mock_client):
with pytest.raises(ValueError, match="not found"):
await probe_ollama_version(
"http://192.168.0.111:11434", "qwen2.5:7b-instruct"
)
@pytest.mark.asyncio
async def test_http_error_propagates(self):
"""HTTP 500 → HTTPStatusError 上拋"""
resp = _mock_response(500, {})
mock_client = AsyncMock()
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
mock_client.get = AsyncMock(return_value=resp)
with patch("httpx.AsyncClient", return_value=mock_client):
with pytest.raises(httpx.HTTPStatusError):
await probe_ollama_version(
"http://192.168.0.111:11434", "qwen2.5:7b-instruct"
)
@pytest.mark.asyncio
async def test_timeout_propagates(self):
"""連線 timeout → TimeoutException 上拋"""
mock_client = AsyncMock()
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
mock_client.get = AsyncMock(side_effect=httpx.TimeoutException("timeout"))
with patch("httpx.AsyncClient", return_value=mock_client):
with pytest.raises(httpx.TimeoutException):
await probe_ollama_version(
"http://192.168.0.111:11434", "qwen2.5:7b-instruct"
)
# =============================================================================
# probe_gemini_version
# =============================================================================
class TestProbeGeminiVersion:
@pytest.mark.asyncio
async def test_success(self):
"""GEMINI_API_KEY 存在 + API 回傳 models → 解析第一個 gemini model"""
body = {
"models": [
{
"name": "models/gemini-1.5-flash",
"supportedGenerationMethods": ["generateContent"],
},
]
}
resp = _mock_response(200, body)
mock_client = AsyncMock()
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
mock_client.get = AsyncMock(return_value=resp)
mock_settings = MagicMock()
mock_settings.GEMINI_API_KEY = "fake-key"
with patch("src.services.model_version_probe.settings", mock_settings), \
patch("httpx.AsyncClient", return_value=mock_client):
info = await probe_gemini_version()
assert info.provider == "gemini"
assert "gemini" in info.model
assert info.digest is None
@pytest.mark.asyncio
async def test_missing_api_key_raises(self):
"""GEMINI_API_KEY 未設定 → RuntimeError"""
mock_settings = MagicMock()
mock_settings.GEMINI_API_KEY = ""
with patch("src.services.model_version_probe.settings", mock_settings):
with pytest.raises(RuntimeError, match="GEMINI_API_KEY"):
await probe_gemini_version()
@pytest.mark.asyncio
async def test_http_error_propagates(self):
"""Gemini API 回 403 → HTTPStatusError"""
resp = _mock_response(403, {})
mock_client = AsyncMock()
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
mock_client.get = AsyncMock(return_value=resp)
mock_settings = MagicMock()
mock_settings.GEMINI_API_KEY = "fake-key"
with patch("src.services.model_version_probe.settings", mock_settings), \
patch("httpx.AsyncClient", return_value=mock_client):
with pytest.raises(httpx.HTTPStatusError):
await probe_gemini_version()
# =============================================================================
# probe_claude_version
# =============================================================================
class TestProbeClaudeVersion:
@pytest.mark.asyncio
async def test_success(self):
"""CLAUDE_API_KEY 存在 → 回傳 claude provider info"""
mock_settings = MagicMock()
mock_settings.CLAUDE_API_KEY = "sk-fake"
with patch("src.services.model_version_probe.settings", mock_settings):
info = await probe_claude_version()
assert info.provider == "claude"
assert "claude" in info.model
assert info.version == info.model
assert info.digest is None
@pytest.mark.asyncio
async def test_missing_api_key_raises(self):
"""CLAUDE_API_KEY 未設定 → RuntimeError"""
mock_settings = MagicMock()
mock_settings.CLAUDE_API_KEY = ""
with patch("src.services.model_version_probe.settings", mock_settings):
with pytest.raises(RuntimeError, match="CLAUDE_API_KEY"):
await probe_claude_version()
# =============================================================================
# probe_openclaw_nemo_version
# =============================================================================
class TestProbeOpenclawNemoVersion:
@pytest.mark.asyncio
async def test_success_model_found(self):
"""model 在 /api/tags 清單 → 正確解析"""
model_entry = {
"name": "deepseek-r1:14b",
"modified_at": "2026-04-03T00:00:00Z",
"digest": "sha256:nemo999",
}
resp = _mock_response(200, _tags_body([model_entry]))
mock_client = AsyncMock()
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
mock_client.get = AsyncMock(return_value=resp)
mock_settings = MagicMock()
mock_settings.OPENCLAW_DEFAULT_MODEL = "deepseek-r1:14b"
mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.188:11434"
with patch("src.services.model_version_probe.settings", mock_settings), \
patch("httpx.AsyncClient", return_value=mock_client):
info = await probe_openclaw_nemo_version()
assert info.provider == "openclaw_nemo"
assert info.model == "deepseek-r1:14b"
assert info.digest == "sha256:nemo999"
@pytest.mark.asyncio
async def test_model_not_in_tags_graceful(self):
"""model 不在清單 → graceful fallback不 raiseversion=model name"""
resp = _mock_response(200, _tags_body([{"name": "other:7b", "modified_at": "", "digest": ""}]))
mock_client = AsyncMock()
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
mock_client.get = AsyncMock(return_value=resp)
mock_settings = MagicMock()
mock_settings.OPENCLAW_DEFAULT_MODEL = "deepseek-r1:14b"
mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.188:11434"
with patch("src.services.model_version_probe.settings", mock_settings), \
patch("httpx.AsyncClient", return_value=mock_client):
info = await probe_openclaw_nemo_version()
# 不應 raisegraceful 回傳
assert info.provider == "openclaw_nemo"
assert info.version == "deepseek-r1:14b"
assert info.digest is None
@pytest.mark.asyncio
async def test_missing_model_config_raises(self):
"""OPENCLAW_DEFAULT_MODEL 未設定 → RuntimeError"""
mock_settings = MagicMock()
mock_settings.OPENCLAW_DEFAULT_MODEL = ""
with patch("src.services.model_version_probe.settings", mock_settings):
with pytest.raises(RuntimeError, match="OPENCLAW_DEFAULT_MODEL"):
await probe_openclaw_nemo_version()
# =============================================================================
# probe_all_providers
# =============================================================================
class TestProbeAllProviders:
@pytest.mark.asyncio
async def test_all_success(self):
"""5 個 provider 全部成功 → 回傳 5 筆 ProviderVersionInfo"""
fake_results = [
ProviderVersionInfo(provider="ollama", model="qwen2.5:7b-instruct", version="v1"),
ProviderVersionInfo(provider="ollama_188", model="qwen2.5:7b-instruct", version="v1"),
ProviderVersionInfo(provider="gemini", model="gemini-1.5-flash", version="gemini-1.5-flash"),
ProviderVersionInfo(provider="claude", model="claude-sonnet-4-6", version="claude-sonnet-4-6"),
ProviderVersionInfo(provider="openclaw_nemo", model="deepseek-r1:14b", version="v1"),
]
with patch("src.services.model_version_probe.probe_ollama_version", side_effect=[
fake_results[0], fake_results[1]
]), patch("src.services.model_version_probe.probe_gemini_version", return_value=fake_results[2]), \
patch("src.services.model_version_probe.probe_claude_version", return_value=fake_results[3]), \
patch("src.services.model_version_probe.probe_openclaw_nemo_version", return_value=fake_results[4]):
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = "http://192.168.0.111:11434"
mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.188:11434"
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
with patch("src.services.model_version_probe.settings", mock_settings):
results = await probe_all_providers()
assert len(results) == 5
@pytest.mark.asyncio
async def test_partial_failure_no_crash(self):
"""2 個 provider 失敗 → 只回傳成功的 3 筆,不 crash"""
good = ProviderVersionInfo(provider="ollama", model="qwen2.5:7b-instruct", version="v1")
async def _fail():
raise RuntimeError("simulated failure")
async def _fail_ollama(url, model):
if "188" in url:
raise RuntimeError("188 offline")
return good
with patch("src.services.model_version_probe.probe_ollama_version", side_effect=_fail_ollama), \
patch("src.services.model_version_probe.probe_gemini_version", side_effect=_fail), \
patch("src.services.model_version_probe.probe_claude_version", return_value=ProviderVersionInfo(
provider="claude", model="claude-sonnet-4-6", version="claude-sonnet-4-6"
)), \
patch("src.services.model_version_probe.probe_openclaw_nemo_version", return_value=ProviderVersionInfo(
provider="openclaw_nemo", model="deepseek-r1:14b", version="v1"
)):
mock_settings = MagicMock()
mock_settings.OLLAMA_URL = "http://192.168.0.111:11434"
mock_settings.OLLAMA_FALLBACK_URL = "http://192.168.0.188:11434"
mock_settings.OLLAMA_HEALTH_CHECK_MODEL = "qwen2.5:7b-instruct"
with patch("src.services.model_version_probe.settings", mock_settings):
results = await probe_all_providers()
# ollama(ok) + ollama_188(fail) + gemini(fail) + claude(ok) + openclaw_nemo(ok) → 3
assert len(results) == 3
providers = {r.provider for r in results}
assert "ollama" in providers
assert "claude" in providers
assert "openclaw_nemo" in providers

View File

@@ -0,0 +1,249 @@
# apps/api/tests/test_model_version_tracker.py
# 2026-04-27 P3.2.2 by Claude
"""
ModelVersionTracker 單元測試
==============================
測試覆蓋:
- 第一次寫入5 row全部 changed=Trueprev_version=None
- 同樣資料重入5 row全部 changed=False
- digest 變更:該 provider changed=True其餘 changed=False
- run_probe_cycle 回傳 dict 格式正確
- probe_all_providers 拋例外 → tracker 不 crash
測試分類unitmock DB session + probe_all_providers無實際 DB 依賴)
"""
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from src.services.model_version_probe import ProviderVersionInfo
from src.services.model_version_tracker import ModelVersionTracker
TAIPEI_TZ = timezone(timedelta(hours=8))
# =============================================================================
# Helpers
# =============================================================================
def _make_info(provider: str, version: str = "v1", digest: str | None = "sha256:abc") -> ProviderVersionInfo:
return ProviderVersionInfo(
provider=provider,
model=f"model-{provider}",
version=version,
digest=digest,
captured_at=datetime.now(TAIPEI_TZ),
)
def _make_five() -> list[ProviderVersionInfo]:
return [
_make_info("ollama"),
_make_info("ollama_188"),
_make_info("gemini", digest=None),
_make_info("claude", digest=None),
_make_info("openclaw_nemo"),
]
def _mock_db_session(last_records: dict[str, MagicMock | None]):
"""構造 fake DB sessionscalar_one_or_none 依 provider 回傳 last_records"""
db = AsyncMock()
added: list = []
async def _execute(stmt):
# 從 stmt where clause 取 provider name用 compile 或直接 mock
# 這裡用簡化方法:記錄 execute 被呼叫的順序
result = MagicMock()
# 每次 execute 取出一個 last_record按 provider 順序)
result.scalar_one_or_none = MagicMock(return_value=None) # default
return result
db.execute = AsyncMock(side_effect=_execute)
db.add = MagicMock(side_effect=lambda obj: added.append(obj))
db.commit = AsyncMock()
db._added = added
return db
# =============================================================================
# Test Cases
# =============================================================================
@pytest.mark.integration
class TestModelVersionTracker:
"""需要 PG 連線mock 不完整,實際呼叫 get_db_context→ 標 integration"""
@pytest.mark.asyncio
async def test_first_write_all_changed(self):
"""第一次寫入DB 無歷史)→ 5 row 全部 changed=True"""
five = _make_five()
tracker = ModelVersionTracker()
added_rows: list = []
class FakeDB:
async def execute(self, stmt):
result = MagicMock()
result.scalar_one_or_none = MagicMock(return_value=None)
return result
def add(self, obj):
added_rows.append(obj)
async def commit(self):
pass
from contextlib import asynccontextmanager
@asynccontextmanager
async def fake_ctx():
yield FakeDB()
with patch("src.services.model_version_tracker.probe_all_providers", return_value=five), \
patch("src.services.model_version_tracker.get_db_context", fake_ctx):
result = await tracker.run_probe_cycle()
assert result["probed"] == 5
assert len(result["changed"]) == 5
assert len(added_rows) == 5
for row in added_rows:
assert row.changed is True
assert row.prev_version is None
@pytest.mark.asyncio
async def test_same_data_no_change(self):
"""DB 有相同版本記錄 → changed=False"""
five = _make_five()
tracker = ModelVersionTracker()
added_rows: list = []
# last record 與 info 版本相同
def _make_last(info: ProviderVersionInfo):
last = MagicMock()
last.version = info.version
last.digest = info.digest
return last
lasts = {info.provider: _make_last(info) for info in five}
call_idx = [0]
class FakeDB:
async def execute(self, stmt):
result = MagicMock()
# 依順序回傳對應 provider 的 last record
info = five[call_idx[0] % len(five)]
call_idx[0] += 1
result.scalar_one_or_none = MagicMock(return_value=lasts[info.provider])
return result
def add(self, obj):
added_rows.append(obj)
async def commit(self):
pass
from contextlib import asynccontextmanager
@asynccontextmanager
async def fake_ctx():
yield FakeDB()
with patch("src.services.model_version_tracker.probe_all_providers", return_value=five), \
patch("src.services.model_version_tracker.get_db_context", fake_ctx):
result = await tracker.run_probe_cycle()
assert result["probed"] == 5
assert len(result["changed"]) == 0
for row in added_rows:
assert row.changed is False
@pytest.mark.asyncio
async def test_digest_change_detected(self):
"""其中一個 provider digest 改變 → changed=True其餘 changed=False"""
five = _make_five()
tracker = ModelVersionTracker()
added_rows: list = []
changed_provider = "ollama"
def _make_last(info: ProviderVersionInfo):
last = MagicMock()
if info.provider == changed_provider:
# 舊 digest 不同
last.version = info.version
last.digest = "sha256:OLD_DIGEST"
else:
last.version = info.version
last.digest = info.digest
return last
lasts = {info.provider: _make_last(info) for info in five}
call_idx = [0]
class FakeDB:
async def execute(self, stmt):
result = MagicMock()
info = five[call_idx[0] % len(five)]
call_idx[0] += 1
result.scalar_one_or_none = MagicMock(return_value=lasts[info.provider])
return result
def add(self, obj):
added_rows.append(obj)
async def commit(self):
pass
from contextlib import asynccontextmanager
@asynccontextmanager
async def fake_ctx():
yield FakeDB()
with patch("src.services.model_version_tracker.probe_all_providers", return_value=five), \
patch("src.services.model_version_tracker.get_db_context", fake_ctx):
result = await tracker.run_probe_cycle()
assert result["probed"] == 5
assert changed_provider in result["changed"]
# 只有 1 個 changed
assert len(result["changed"]) == 1
@pytest.mark.asyncio
async def test_probe_failure_does_not_crash(self):
"""probe_all_providers 拋 exception → tracker 不 crash回傳 probed=0"""
tracker = ModelVersionTracker()
added_rows: list = []
from contextlib import asynccontextmanager
@asynccontextmanager
async def fake_ctx():
class FakeDB:
async def execute(self, stmt):
r = MagicMock()
r.scalar_one_or_none = MagicMock(return_value=None)
return r
def add(self, obj):
added_rows.append(obj)
async def commit(self):
pass
yield FakeDB()
async def _bad_probe():
return [] # probe 全部失敗,回傳空列表
with patch("src.services.model_version_tracker.probe_all_providers", side_effect=_bad_probe), \
patch("src.services.model_version_tracker.get_db_context", fake_ctx):
result = await tracker.run_probe_cycle()
assert result["probed"] == 0
assert result["changed"] == []
assert len(added_rows) == 0

View File

@@ -0,0 +1,496 @@
{
"__inputs": [],
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "10.0.0"
},
{
"type": "panel",
"id": "timeseries",
"name": "Time series",
"version": ""
},
{
"type": "panel",
"id": "gauge",
"name": "Gauge",
"version": ""
},
{
"type": "panel",
"id": "barchart",
"name": "Bar chart",
"version": ""
},
{
"type": "panel",
"id": "table",
"name": "Table",
"version": ""
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
}
],
"annotations": {
"list": []
},
"description": "AI 自主化飛輪 SLO Dashboard — 自主化率/決策準確率/信心校準/KM 增長率 | ADR-100 2026-04-27 P3.4 台北時區",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"links": [],
"refresh": "60s",
"schemaVersion": 39,
"tags": ["slo", "ai", "autonomous", "flywheel"],
"templating": {
"list": [
{
"current": {},
"hide": 0,
"includeAll": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"type": "datasource"
}
]
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {},
"timezone": "Asia/Taipei",
"title": "AI 自主化飛輪 SLO",
"uid": "ai-autonomous-slo-v1",
"version": 1,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "比率",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false
},
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "yellow", "value": 0.7 },
{ "color": "green", "value": 0.8 }
]
},
"unit": "percentunit"
},
"overrides": [
{
"matcher": { "id": "byName", "options": "SLO 目標 80%" },
"properties": [
{ "id": "custom.lineStyle", "value": { "dash": [10, 5], "fill": "dash" } },
{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } },
{ "id": "custom.lineWidth", "value": 1 }
]
}
]
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"id": 1,
"options": {
"legend": { "calcs": ["lastNotNull", "min"], "displayMode": "list", "placement": "bottom" },
"tooltip": { "mode": "multi", "sort": "none" }
},
"title": "SLO 1 — 自主化率24h 趨勢)",
"description": "SLI = auto_executed / all_operations5m rate\n目標 SLO ≥ 80%\n橙色虛線 = 80% 閾值",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "sli:autonomy_rate:5m",
"legendFormat": "自主化率",
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "vector(0.80)",
"legendFormat": "SLO 目標 80%",
"refId": "B"
}
],
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "比率",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false
},
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "yellow", "value": 0.85 },
{ "color": "green", "value": 0.9 }
]
},
"unit": "percentunit"
},
"overrides": [
{
"matcher": { "id": "byName", "options": "SLO 目標 90%" },
"properties": [
{ "id": "custom.lineStyle", "value": { "dash": [10, 5], "fill": "dash" } },
{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } },
{ "id": "custom.lineWidth", "value": 1 }
]
}
]
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"id": 2,
"options": {
"legend": { "calcs": ["lastNotNull", "min"], "displayMode": "list", "placement": "bottom" },
"tooltip": { "mode": "multi", "sort": "none" }
},
"title": "SLO 2 — 決策準確率24h 趨勢)",
"description": "SLI = verifier_success / auto_executed5m rate\n目標 SLO ≥ 90%\n橙色虛線 = 90% 閾值",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "sli:decision_accuracy:5m",
"legendFormat": "決策準確率",
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "vector(0.90)",
"legendFormat": "SLO 目標 90%",
"refId": "B"
}
],
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "yellow", "value": 0.7 },
{ "color": "green", "value": 0.8 }
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": { "h": 8, "w": 6, "x": 0, "y": 8 },
"id": 3,
"options": {
"minVizHeight": 75,
"minVizWidth": 75,
"orientation": "auto",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"showThresholdLabels": true,
"showThresholdMarkers": true
},
"title": "SLO 3 — 信心校準(當前值)",
"description": "SLI = high_confidence_success / high_confidence_total1h 滑動窗口)\n目標 SLO ≥ 80%(綠線)\n≥ 0.8 = 綠色0.7~0.8 = 黃色,< 0.7 = 紅色",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "sli:confidence_calibration:1h",
"legendFormat": "信心校準",
"refId": "A"
}
],
"type": "gauge"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"fillOpacity": 70,
"gradientMode": "none",
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
"lineWidth": 1
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "yellow", "value": 5 },
{ "color": "green", "value": 20 }
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": { "h": 8, "w": 6, "x": 6, "y": 8 },
"id": 4,
"options": {
"barRadius": 0,
"barWidth": 0.8,
"colorByField": "Value",
"fullHighlight": false,
"groupWidth": 0.7,
"legend": { "calcs": [], "displayMode": "list", "placement": "bottom" },
"orientation": "auto",
"showValue": "always",
"stacking": "none",
"tooltip": { "mode": "single", "sort": "none" },
"xTickLabelRotation": 0,
"xTickLabelSpacing": 0
},
"title": "SLO 4 — KM 增長率7d 每日新增)",
"description": "SLI = increase(knowledge_entries_total[24h])\n目標 SLO ≥ 20 筆/day綠色\n5~20 = 黃色,< 5 = 紅色(疑似 KM 鏈斷裂)",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "sli:km_growth_rate:24h",
"legendFormat": "KM 增長/day",
"refId": "A"
}
],
"type": "barchart"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [
{
"options": { "from": 0, "to": 0, "result": { "color": "red", "text": "已耗盡" } },
"type": "range"
}
],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "yellow", "value": 0.25 },
{ "color": "green", "value": 0.5 }
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"id": 5,
"options": {
"minVizHeight": 75,
"minVizWidth": 75,
"orientation": "horizontal",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"title": "Error Budget Remaining4 SLO",
"description": "剩餘 error budget 比例1 = 100% 剩餘0 = 已耗盡)\n- SLO 3 信心校準 budget 計算:(1 - SLI) / 0.20\n- SLO 1/2 用 5m rate 估算\n- 顯示: 各 SLO 剩餘預算 %",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "clamp(slo:autonomy_rate:error_budget_remaining, 0, 1)",
"legendFormat": "SLO1 自主化率",
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "clamp(slo:decision_accuracy:error_budget_remaining, 0, 1)",
"legendFormat": "SLO2 決策準確率",
"refId": "B"
},
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "clamp(slo:confidence_calibration:error_budget_remaining, 0, 1)",
"legendFormat": "SLO3 信心校準",
"refId": "C"
}
],
"type": "gauge"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"custom": {
"align": "auto",
"cellOptions": { "type": "auto" },
"filterable": false,
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 1 },
{ "color": "red", "value": 2 }
]
}
},
"overrides": [
{
"matcher": { "id": "byName", "options": "alertname" },
"properties": [{ "id": "custom.width", "value": 300 }]
},
{
"matcher": { "id": "byName", "options": "severity" },
"properties": [
{ "id": "custom.width", "value": 100 },
{
"id": "mappings",
"value": [
{ "options": { "critical": { "color": "red", "index": 0 } }, "type": "value" },
{ "options": { "warning": { "color": "yellow", "index": 1 } }, "type": "value" },
{ "options": { "info": { "color": "blue", "index": 2 } }, "type": "value" }
]
},
{ "id": "custom.cellOptions", "value": { "type": "color-background" } }
]
},
{
"matcher": { "id": "byName", "options": "slo_name" },
"properties": [{ "id": "custom.width", "value": 200 }]
},
{
"matcher": { "id": "byName", "options": "burn_window" },
"properties": [{ "id": "custom.width", "value": 100 }]
}
]
},
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 16 },
"id": 6,
"options": {
"cellHeight": "sm",
"footer": { "countRows": false, "enablePagination": false, "fields": "", "reducer": ["sum"], "show": false },
"showHeader": true,
"sortBy": [{ "desc": true, "displayName": "severity" }]
},
"title": "Burn Rate Alerts當前觸發",
"description": "列出當前觸發中的 SLO burn rate alerts\n按 severity 排序critical > warning > info\n空白 = 所有 SLO 健康",
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${datasource}" },
"expr": "ALERTS{slo_name=~\".+\", alertstate=\"firing\"}",
"format": "table",
"instant": true,
"legendFormat": "",
"refId": "A"
}
],
"transformations": [
{
"id": "filterFieldsByName",
"options": {
"include": {
"names": ["alertname", "severity", "slo_name", "burn_window", "team", "alertstate"]
}
}
},
{
"id": "sortBy",
"options": {
"fields": [{ "desc": true, "displayName": "severity" }]
}
}
],
"type": "table"
}
]
}