fix: stop runaway EA Telegram spam (cooldown + API key detection + dedup)
All checks were successful
CD Pipeline / deploy (push) Successful in 1m20s

Root cause: OPENROUTER_API_KEY not set → fallback confidence=0.60 →
always below threshold → _escalate_to_human() every 60s loop → infinite
Telegram messages, all meaningless.

Three-layer fix:
1. API Key detection: if fallback_decision triggered (reasoning contains
   "Elephant Alpha unavailable"), silently skip — no Telegram, no cost,
   update last_triggered to prevent infinite retry
2. Per-trigger cooldown in _check_triggers():
   price_drop_alert 30min / market_opportunity 60min /
   threat_escalation 15min / resource_optimization 60min
3. Escalation dedup in _escalate_to_human(): _last_escalated[] tracks
   last Telegram send time per trigger type; suppresses within cooldown

Valid HITL escalations (when EA is actually online) still work correctly.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
ogt
2026-04-20 05:34:21 +08:00
parent d8d1f3dee8
commit 61496af2c5

View File

@@ -81,6 +81,15 @@ class ElephantAlphaAutonomousEngine:
ADR-013: resource_optimization → auto_heal_service
"""
# 各 trigger 的 escalation cooldown分鐘
ESCALATION_COOLDOWN: Dict[str, int] = {
"price_drop_alert": 30, # 同一類型 30 分鐘只發一次
"market_opportunity": 60,
"threat_escalation": 15,
"resource_optimization": 60,
}
DEFAULT_COOLDOWN_MIN = 30
def __init__(self):
self.decision_history: List[DecisionOutcome] = [] # 最近 100 筆快取;持久化到 DB
self.triggers: List[AutonomousTrigger] = []
@@ -95,6 +104,9 @@ class ElephantAlphaAutonomousEngine:
self.max_hourly_cost_usd = 5.0 # $5/hr 硬上限
self._cost_per_ea_call = 0.002 # ~100B 模型每次約 $0.002 (1K tokens)
# Escalation dedup記錄每個 trigger 最後一次 escalate 時間
self._last_escalated: Dict[str, datetime] = {}
self._initialize_triggers()
def _initialize_triggers(self):
@@ -151,6 +163,18 @@ class ElephantAlphaAutonomousEngine:
for trigger in self.triggers:
if not trigger.enabled:
continue
# Cooldown guard同一 trigger 在 cooldown 期間內不重複執行
cooldown_min = self.ESCALATION_COOLDOWN.get(
trigger.trigger_type, self.DEFAULT_COOLDOWN_MIN
)
if trigger.last_triggered:
elapsed = (datetime.now() - trigger.last_triggered).total_seconds() / 60
if elapsed < cooldown_min:
logger.debug(
f"[ElephantAlpha] Trigger {trigger.trigger_type} in cooldown "
f"({elapsed:.1f}/{cooldown_min} min), skip"
)
continue
if await self._evaluate_trigger(trigger):
await self._execute_autonomous_decision(trigger)
@@ -254,10 +278,23 @@ class ElephantAlphaAutonomousEngine:
# ADR-013: resource_optimization → AIOps autoheal loop
if trigger.trigger_type == "resource_optimization":
await self._handle_resource_via_autoheal(trigger)
trigger.last_triggered = datetime.now()
return
context = await self._build_trigger_context(trigger)
decision = await elephant_orchestrator.analyze_and_coordinate(context)
# ── API Key 未設定時 fallback decision 的偵測 ──────────────
# fallback_decision 的 reasoning 固定字串,偵測到就靜默跳過,
# 不發 Telegram、不計費、只 log warning。
if "Elephant Alpha unavailable" in (decision.reasoning or ""):
logger.warning(
f"[ElephantAlpha] API Key 未設定trigger={trigger.trigger_type} "
"跳過本次決策(不發 Telegram。請在 momo-scheduler 環境變數注入 OPENROUTER_API_KEY。"
)
trigger.last_triggered = datetime.now() # 仍更新以避免無限重試
return
self.hourly_cost_usd += self._cost_per_ea_call
# 護欄 1: price 行動信心閾值 0.85
@@ -275,7 +312,8 @@ class ElephantAlphaAutonomousEngine:
# W2-B: ADR-012 §⑤ — 執行完畢後強制發 Telegram audit trail
await self._notify_telegram_executed(decision, trigger)
else:
# W2-A: ADR-012 §⑤ — 升級人工 + 強制發 Telegram
# W2-A: ADR-012 §⑤ — 升級人工 + Telegram(有 dedup 保護)
trigger.last_triggered = datetime.now()
await self._escalate_to_human(decision, trigger)
async def _handle_resource_via_autoheal(self, trigger: AutonomousTrigger):
@@ -534,13 +572,24 @@ class ElephantAlphaAutonomousEngine:
W2-A: ADR-012 §⑤ — 信心不足時雙寫:
1. ai_insights (DB)
2. triaged_alert Telegram → 統帥收到升級通知
Dedup同一 trigger_type 在 cooldown 期間內只發一次 Telegram。
"""
logger.warning(
f"[ElephantAlpha] Escalating to human: {trigger.trigger_type} "
f"confidence={decision.confidence:.2f}"
)
# 1. DB write
# Dedup check同一 trigger 在 cooldown 期間內只寫 DB + 發一次 Telegram
cooldown_min = self.ESCALATION_COOLDOWN.get(
trigger.trigger_type, self.DEFAULT_COOLDOWN_MIN
)
last_esc = self._last_escalated.get(trigger.trigger_type)
telegram_allowed = (
last_esc is None
or (datetime.now() - last_esc).total_seconds() / 60 >= cooldown_min
)
# 1. DB write每次都記錄不 dedup
session = get_session()
try:
session.execute(text("""
@@ -569,7 +618,15 @@ class ElephantAlphaAutonomousEngine:
finally:
session.close()
# 2. Telegram (ADR-012 §⑤ 強制)
# 2. Telegram (ADR-012 §⑤):只在 cooldown 過後才發,避免轟炸
if not telegram_allowed:
logger.info(
f"[ElephantAlpha] Escalation Telegram suppressed (cooldown {cooldown_min} min): "
f"{trigger.trigger_type}"
)
return
self._last_escalated[trigger.trigger_type] = datetime.now()
try:
from services.telegram_templates import triaged_alert, _send_telegram_raw
msg, keyboard = triaged_alert(