feat(telegram): SRE 戰情室群組三頭政治 Triumvirate (ADR-053)
All checks were successful
CD Pipeline / build-and-deploy (push) Successful in 7m6s
All checks were successful
CD Pipeline / build-and-deploy (push) Successful in 7m6s
- config.py: 新增 OPENCLAW_BOT_TOKEN / NEMOTRON_BOT_TOKEN / SRE_GROUP_CHAT_ID - telegram_gateway.py: send_to_group / send_as_openclaw / send_as_nemotron / trigger_group_ai_discussion / _send_approval_card_to_group - send_approval_card 告警發送後非同步觸發群組 AI 雙向討論 - configmap: SRE_GROUP_CHAT_ID=-1003711974679 - secrets: OPENCLAW_BOT_TOKEN / NEMOTRON_BOT_TOKEN CHANGE_ME 佔位 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -345,6 +345,19 @@ class Settings(BaseSettings):
|
||||
default=False,
|
||||
description="Telegram Polling (False: OpenClaw handles it; True: only if OpenClaw unavailable)",
|
||||
)
|
||||
# 2026-04-03 ogt: SRE 戰情室群組三頭政治 (Triumvirate) — ADR-053
|
||||
OPENCLAW_BOT_TOKEN: str = Field(
|
||||
default="",
|
||||
description="@OpenClawAwoooI_Bot Token — 群組內代表 OpenClaw AI 發言",
|
||||
)
|
||||
NEMOTRON_BOT_TOKEN: str = Field(
|
||||
default="",
|
||||
description="@NemoTronAwoooI_Bot Token — 群組內代表 NemoClaw AI 發言",
|
||||
)
|
||||
SRE_GROUP_CHAT_ID: str = Field(
|
||||
default="",
|
||||
description="AwoooI SRE 戰情室群組 Chat ID",
|
||||
)
|
||||
|
||||
def get_tg_user_whitelist(self) -> list[int]:
|
||||
"""Parse comma-separated or JSON array user IDs to list[int]"""
|
||||
|
||||
@@ -1375,8 +1375,58 @@ class TelegramGateway:
|
||||
message_id=result.get("result", {}).get("message_id"),
|
||||
)
|
||||
|
||||
# 2026-04-03 ogt: 發到 SRE 群組並觸發 AI 雙向討論 (Triumvirate ADR-053)
|
||||
# 非同步執行,失敗不影響告警主流程
|
||||
if settings.SRE_GROUP_CHAT_ID:
|
||||
asyncio.create_task(
|
||||
self._send_approval_card_to_group(
|
||||
approval_id=approval_id,
|
||||
risk_level=risk_level,
|
||||
resource_name=resource_name,
|
||||
root_cause=root_cause,
|
||||
suggested_action=suggested_action,
|
||||
)
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
async def _send_approval_card_to_group(
|
||||
self,
|
||||
approval_id: str,
|
||||
risk_level: str,
|
||||
resource_name: str,
|
||||
root_cause: str,
|
||||
suggested_action: str,
|
||||
) -> None:
|
||||
"""
|
||||
發送告警卡片到 SRE 群組並觸發 AI 討論
|
||||
|
||||
由 asyncio.create_task 非同步呼叫,失敗不影響主告警流程。
|
||||
"""
|
||||
try:
|
||||
risk_emoji = {"critical": "🔴", "medium": "🟡", "low": "🟢"}.get(risk_level, "⚪")
|
||||
summary = (
|
||||
f"{risk_emoji} <b>[{risk_level.upper()}] SRE 告警</b>\n\n"
|
||||
f"📦 資源: <code>{resource_name}</code>\n"
|
||||
f"🔍 根因: {root_cause}\n"
|
||||
f"💡 建議: {suggested_action}\n"
|
||||
f"🆔 <code>{approval_id}</code>"
|
||||
)
|
||||
group_result = await self.send_to_group(text=summary)
|
||||
group_msg_id = (
|
||||
group_result.get("result", {}).get("message_id")
|
||||
if group_result.get("ok")
|
||||
else None
|
||||
)
|
||||
|
||||
if group_msg_id:
|
||||
await self.trigger_group_ai_discussion(
|
||||
alert_message_id=group_msg_id,
|
||||
alert_summary=f"[{risk_level.upper()}] 資源: {resource_name}\n根因: {root_cause}\n建議: {suggested_action}",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("send_approval_card_to_group_failed", error=str(e))
|
||||
|
||||
# =========================================================================
|
||||
# 新訊息發送方法 (2026-03-29 ogt: ADR-038)
|
||||
# =========================================================================
|
||||
@@ -2470,6 +2520,220 @@ class TelegramGateway:
|
||||
|
||||
return await self._send_request("sendMessage", payload)
|
||||
|
||||
# =========================================================================
|
||||
# 2026-04-03 ogt: SRE 戰情室群組三頭政治 (Triumvirate) — ADR-053
|
||||
# @tsenyangbot 發告警卡片到群組,OpenClaw/NemoClaw Bot 各自回覆分析
|
||||
# =========================================================================
|
||||
|
||||
async def send_to_group(
|
||||
self,
|
||||
text: str,
|
||||
parse_mode: str = "HTML",
|
||||
reply_markup: dict | None = None,
|
||||
) -> dict:
|
||||
"""
|
||||
用 @tsenyangbot 發訊息到 SRE 群組 (SRE_GROUP_CHAT_ID)
|
||||
|
||||
Args:
|
||||
text: 訊息內容
|
||||
parse_mode: 解析模式
|
||||
reply_markup: 按鈕 (可選)
|
||||
|
||||
Returns:
|
||||
dict: Telegram API 回應 (含 message_id)
|
||||
"""
|
||||
if not settings.SRE_GROUP_CHAT_ID:
|
||||
logger.warning("send_to_group_skipped", reason="SRE_GROUP_CHAT_ID not configured")
|
||||
return {}
|
||||
|
||||
payload: dict = {
|
||||
"chat_id": settings.SRE_GROUP_CHAT_ID,
|
||||
"text": text[:4096],
|
||||
"parse_mode": parse_mode,
|
||||
}
|
||||
if reply_markup:
|
||||
payload["reply_markup"] = reply_markup
|
||||
|
||||
return await self._send_request("sendMessage", payload)
|
||||
|
||||
async def _send_as_bot(
|
||||
self,
|
||||
token: str,
|
||||
chat_id: str,
|
||||
text: str,
|
||||
reply_to_message_id: int | None = None,
|
||||
parse_mode: str = "HTML",
|
||||
) -> dict:
|
||||
"""
|
||||
用指定 Bot Token 發訊息(不走 self._http_client,獨立建立請求)
|
||||
|
||||
Args:
|
||||
token: Bot Token
|
||||
chat_id: 群組 Chat ID
|
||||
text: 訊息內容
|
||||
reply_to_message_id: 回覆哪則訊息的 message_id
|
||||
parse_mode: 解析模式
|
||||
|
||||
Returns:
|
||||
dict: Telegram API 回應
|
||||
"""
|
||||
if not self._http_client:
|
||||
raise TelegramGatewayError("HTTP client not initialized")
|
||||
|
||||
url = f"{self.TELEGRAM_API_BASE}/bot{token}/sendMessage"
|
||||
payload: dict = {
|
||||
"chat_id": chat_id,
|
||||
"text": text[:4096],
|
||||
"parse_mode": parse_mode,
|
||||
}
|
||||
if reply_to_message_id:
|
||||
payload["reply_to_message_id"] = reply_to_message_id
|
||||
|
||||
response = await self._http_client.post(url, json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
async def send_as_openclaw(
|
||||
self,
|
||||
text: str,
|
||||
reply_to_message_id: int | None = None,
|
||||
) -> dict:
|
||||
"""
|
||||
用 @OpenClawAwoooI_Bot 在群組發言
|
||||
|
||||
Args:
|
||||
text: 訊息內容
|
||||
reply_to_message_id: 回覆哪則訊息
|
||||
|
||||
Returns:
|
||||
dict: Telegram API 回應
|
||||
"""
|
||||
if not settings.OPENCLAW_BOT_TOKEN or not settings.SRE_GROUP_CHAT_ID:
|
||||
logger.warning("send_as_openclaw_skipped", reason="OPENCLAW_BOT_TOKEN or SRE_GROUP_CHAT_ID not configured")
|
||||
return {}
|
||||
|
||||
return await self._send_as_bot(
|
||||
token=settings.OPENCLAW_BOT_TOKEN,
|
||||
chat_id=settings.SRE_GROUP_CHAT_ID,
|
||||
text=text,
|
||||
reply_to_message_id=reply_to_message_id,
|
||||
)
|
||||
|
||||
async def send_as_nemotron(
|
||||
self,
|
||||
text: str,
|
||||
reply_to_message_id: int | None = None,
|
||||
) -> dict:
|
||||
"""
|
||||
用 @NemoTronAwoooI_Bot 在群組發言
|
||||
|
||||
Args:
|
||||
text: 訊息內容
|
||||
reply_to_message_id: 回覆哪則訊息
|
||||
|
||||
Returns:
|
||||
dict: Telegram API 回應
|
||||
"""
|
||||
if not settings.NEMOTRON_BOT_TOKEN or not settings.SRE_GROUP_CHAT_ID:
|
||||
logger.warning("send_as_nemotron_skipped", reason="NEMOTRON_BOT_TOKEN or SRE_GROUP_CHAT_ID not configured")
|
||||
return {}
|
||||
|
||||
return await self._send_as_bot(
|
||||
token=settings.NEMOTRON_BOT_TOKEN,
|
||||
chat_id=settings.SRE_GROUP_CHAT_ID,
|
||||
text=text,
|
||||
reply_to_message_id=reply_to_message_id,
|
||||
)
|
||||
|
||||
async def trigger_group_ai_discussion(
|
||||
self,
|
||||
alert_message_id: int,
|
||||
alert_summary: str,
|
||||
) -> None:
|
||||
"""
|
||||
觸發群組 AI 雙向討論(三頭政治核心流程)
|
||||
|
||||
流程:
|
||||
1. @OpenClawAwoooI_Bot reply 告警訊息,輸出 RCA 分析
|
||||
2. @NemoTronAwoooI_Bot reply OpenClaw 訊息,補充評論
|
||||
3. 完成後停止(避免無限循環)
|
||||
|
||||
此方法由 asyncio.create_task 非同步呼叫,失敗不影響主流程。
|
||||
|
||||
Args:
|
||||
alert_message_id: 告警訊息的 message_id(兩個 Bot 回覆的起點)
|
||||
alert_summary: 告警摘要文字(提供給 AI 分析用)
|
||||
"""
|
||||
try:
|
||||
from apps.api.src.services.chat_manager import ChatManager # noqa: PLC0415
|
||||
except ImportError:
|
||||
try:
|
||||
from src.services.chat_manager import ChatManager # noqa: PLC0415
|
||||
except ImportError:
|
||||
logger.error("trigger_group_ai_discussion_failed", reason="Cannot import ChatManager")
|
||||
return
|
||||
|
||||
try:
|
||||
chat_mgr = ChatManager()
|
||||
|
||||
# Step 1: OpenClaw 分析告警
|
||||
openclaw_prompt = (
|
||||
f"你是 OpenClaw,AWOOOI SRE 戰情室的首席 AI 分析師。\n"
|
||||
f"以下是一則基礎設施告警,請進行 RCA 根因分析並給出 3 點建議行動:\n\n"
|
||||
f"{alert_summary}"
|
||||
)
|
||||
openclaw_analysis = await chat_mgr._call_openclaw(
|
||||
system_prompt="你是 OpenClaw,AWOOOI SRE 戰情室首席 AI,精通 K8s、Prometheus、告警分析。",
|
||||
user_message=openclaw_prompt,
|
||||
)
|
||||
|
||||
if not openclaw_analysis:
|
||||
logger.warning("trigger_group_ai_discussion_openclaw_empty")
|
||||
return
|
||||
|
||||
openclaw_text = f"🔍 <b>OpenClaw 分析</b>\n\n{openclaw_analysis}"
|
||||
openclaw_result = await self.send_as_openclaw(
|
||||
text=openclaw_text,
|
||||
reply_to_message_id=alert_message_id,
|
||||
)
|
||||
|
||||
openclaw_msg_id = (
|
||||
openclaw_result.get("result", {}).get("message_id")
|
||||
if openclaw_result.get("ok")
|
||||
else None
|
||||
)
|
||||
|
||||
logger.info("group_ai_discussion_openclaw_sent", message_id=openclaw_msg_id)
|
||||
|
||||
# Step 2: NemoClaw 補充評論(回覆 OpenClaw 訊息)
|
||||
nemo_prompt = (
|
||||
f"你是 NemoClaw,AWOOOI SRE 戰情室的 NemoClaw AI。\n"
|
||||
f"OpenClaw 剛剛對以下告警做了分析:\n\n"
|
||||
f"【原始告警】\n{alert_summary}\n\n"
|
||||
f"【OpenClaw 分析】\n{openclaw_analysis}\n\n"
|
||||
f"請從不同角度補充你的觀點,並指出任何可能被忽略的風險點。"
|
||||
)
|
||||
nemo_analysis = await chat_mgr._call_nemotron(
|
||||
system_prompt="你是 NemoClaw,AWOOOI SRE 戰情室 AI,擅長補充分析與風險評估。",
|
||||
user_message=nemo_prompt,
|
||||
)
|
||||
|
||||
if not nemo_analysis:
|
||||
logger.warning("trigger_group_ai_discussion_nemo_empty")
|
||||
return
|
||||
|
||||
nemo_text = f"🤖 <b>NemoClaw 補充</b>\n\n{nemo_analysis}"
|
||||
await self.send_as_nemotron(
|
||||
text=nemo_text,
|
||||
reply_to_message_id=openclaw_msg_id or alert_message_id,
|
||||
)
|
||||
|
||||
logger.info("group_ai_discussion_completed", alert_message_id=alert_message_id)
|
||||
|
||||
except Exception as e:
|
||||
# 群組 AI 討論失敗不影響主流程
|
||||
logger.error("trigger_group_ai_discussion_failed", error=str(e))
|
||||
|
||||
async def close(self) -> None:
|
||||
"""關閉 Gateway"""
|
||||
# 停止 Long Polling 與 Leader 相關 Tasks
|
||||
|
||||
60
k8s/awoooi-prod/03-secrets.yaml
Normal file
60
k8s/awoooi-prod/03-secrets.yaml
Normal file
@@ -0,0 +1,60 @@
|
||||
# AWOOOI 正式環境 Secrets 模板
|
||||
# 負責人: CIO / CISO
|
||||
# 版本: v1.0
|
||||
# 日期: 2026-03-20
|
||||
#
|
||||
# ⚠️ 注意: 此檔案為模板,實際值由 CI/CD 或手動注入
|
||||
# 實際 Secret 值不應提交到 Git
|
||||
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: awoooi-secrets
|
||||
namespace: awoooi-prod
|
||||
type: Opaque
|
||||
stringData:
|
||||
# 資料庫連線 (實際值請替換)
|
||||
# 重要: 必須使用 +asyncpg 驅動 (2026-03-28 K-HA 遷移確認)
|
||||
DATABASE_URL: "postgresql+asyncpg://awoooi:CHANGE_ME@192.168.0.188:5432/awoooi_prod"
|
||||
|
||||
# Redis 連線
|
||||
REDIS_URL: "redis://192.168.0.188:6380/10"
|
||||
|
||||
# JWT 認證
|
||||
JWT_SECRET: "CHANGE_ME_TO_RANDOM_STRING"
|
||||
JWT_ALGORITHM: "HS256"
|
||||
|
||||
# AI 服務 (雲端備援) - ADR-006 v1.3 + ADR-036
|
||||
GEMINI_API_KEY: "CHANGE_ME"
|
||||
CLAUDE_API_KEY: "CHANGE_ME"
|
||||
# 2026-03-29 ogt: ADR-036 Nemotron Tool Calling (83% 精準度)
|
||||
NVIDIA_API_KEY: "CHANGE_ME"
|
||||
|
||||
# 通知服務
|
||||
SMTP_HOST: "smtp.example.com"
|
||||
SMTP_USER: "CHANGE_ME"
|
||||
SMTP_PASSWORD: "CHANGE_ME"
|
||||
|
||||
# Phase 5.5: Telegram Gateway (OpenClaw)
|
||||
OPENCLAW_TG_BOT_TOKEN: "CHANGE_ME"
|
||||
OPENCLAW_TG_CHAT_ID: "CHANGE_ME"
|
||||
OPENCLAW_TG_USER_WHITELIST: "CHANGE_ME"
|
||||
|
||||
# 2026-04-03 ogt: SRE 戰情室群組三頭政治 (Triumvirate ADR-053)
|
||||
# 實際值由 CD 注入 (kubectl patch secret),此處為佔位
|
||||
OPENCLAW_BOT_TOKEN: "CHANGE_ME"
|
||||
NEMOTRON_BOT_TOKEN: "CHANGE_ME"
|
||||
|
||||
# Webhook 安全 (CISO 要求)
|
||||
WEBHOOK_HMAC_SECRET: "CHANGE_ME_TO_RANDOM_64_CHARS"
|
||||
|
||||
# ============================================================================
|
||||
# Phase 10: Sentry Self-Hosted (192.168.0.110:9000)
|
||||
# 2026-03-27: 首席架構師審查 - 補齊遺漏配置
|
||||
# DSN 格式: http://{public_key}@{host}:{port}/{project_id}
|
||||
# ============================================================================
|
||||
SENTRY_DSN: "CHANGE_ME"
|
||||
# 2026-03-29 ogt: ADR-037 - Comment 回寫需要 Auth Token
|
||||
# 取得方式: Sentry UI → Settings → Auth Tokens → Create New Token
|
||||
# 權限: event:admin, project:read, project:write
|
||||
SENTRY_AUTH_TOKEN: "CHANGE_ME"
|
||||
@@ -71,6 +71,12 @@ data:
|
||||
# Phase 22.6: 統帥需要直接在同一 Bot 與 OpenClaw/NemoClaw 雙 AI 對話
|
||||
TELEGRAM_ENABLE_POLLING: "true"
|
||||
|
||||
# ============================================================================
|
||||
# 2026-04-03 ogt: SRE 戰情室群組三頭政治 (Triumvirate ADR-053)
|
||||
# OPENCLAW_BOT_TOKEN / NEMOTRON_BOT_TOKEN 在 Secrets 中配置
|
||||
# ============================================================================
|
||||
SRE_GROUP_CHAT_ID: "-1003711974679"
|
||||
|
||||
# 快取 TTL (秒)
|
||||
CACHE_TTL_DASHBOARD: "300"
|
||||
CACHE_TTL_HOST_STATUS: "30"
|
||||
|
||||
Reference in New Issue
Block a user