fix(strategist): plug stale gate gaps in daily/monthly + psutil dep + complete stale shape (critic post-review)
critic post-review 對 commit9158bbe找到 3 個 BLOCKER,本 patch 補完。 #1 daily/monthly 路徑沒套 stale gate(Critical) - generate_daily_report() 在 _fetch_sales_summary(7) 後立刻檢查 sales.get("stale"), 避免 daily_sales_snapshot 過期時每天 09:00 復發 NT$0 偽日報 - generate_monthly_report() 在 _fetch_monthly_sales_summary 後檢查 revenue==0 and sku_count==0(_fetch_monthly_sales_summary 沒內建 stale 機制, 以「無資料」當訊號),避免月初產出 NT$0 偽月報 - 抽出 _send_data_stale_alert(report_type, last_date, period) 統一三條路徑共用, 以 ai_insights 表查近 24h 同 report_type 的 data_stale_alert 紀錄做 dedupe, 避免每天 daily/weekly 三份報告同時觸發 → 一天送多次告警噪音 - weekly stale 分支改用 _send_data_stale_alert(return shape 維持 status="error" 不動,保9158bbeweekly dedupe/cache 機制下游語意) - daily/monthly 採 critic 建議的 status="skipped";scheduler 既有 task 不檢查 status 也不 raise,EventRouter 不會被誤觸 #2 psutil 沒在 requirements.txt(Critical) - requirements.txt 加 psutil>=5.9(ADR-019 Phase 2 要求 production 必裝) - elephant_alpha_autonomous_engine._get_system_load_percentage 加註解: ImportError fallback 是 queue-based 估算(pending=14→70%、≥18→90%), 與真實 CPU 無關,僅 dev defensive;prod 觸發即代表容器映像漏裝 #9 _fetch_sales_summary stale 分支 return shape 不完整(High) - stale 分支補完 daily/current_7d_revenue/prev_7d_revenue/wow_pct/sku_count - 數值欄位用 None(非 0):未套 stale gate 的上游 caller 在 prompt template `:,.0f` 會 raise TypeError,比靜默 0 明顯,迫使呼叫端必須 stale gate 三問自審: - 方案正確:daily 不再發 NT$0(_fetch_sales_summary 後立刻 gate);psutil 會被 pip install;stale shape None 在所有 caller 路徑要嘛被 gate 擋下要嘛 raise - 影響完整:grep 確認 _fetch_sales_summary 三 caller(weekly/daily/monthly via _fetch_monthly_sales_summary)皆已加 gate;無下游依賴 data_stale return 字串 - Regression 風險:stale shape 改 None 為純擴充無 caller 取數值欄位;scheduler 只讀 period/chart_count/action_count 不檢查 status;monthly revenue=0 gate 在實務 P0 異常時告警,比靜默產 NT$0 月報可接受 不在本 patch scope(critic 任務描述明示禁動): -9158bbeweekly dedupe/cache 機制(_acquire_weekly_strategy_send_lock 等) - elephant_alpha Phase 1 已修部分(trigger/dispatch/method raise) - run_scheduler.py 排程設定 - tests/ 內任何測試 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -26,4 +26,5 @@ pgvector>=0.2
|
||||
paramiko # ADR-013: AIOps SSH 跳板修復
|
||||
python-pptx # ADR-014: PPT 簡報系統
|
||||
matplotlib # 圖表生成(日報/週報/月報)
|
||||
matplotlib-inline # Jupyter 相容層(可選)
|
||||
matplotlib-inline # Jupyter 相容層(可選)
|
||||
psutil>=5.9 # ADR-019 Phase 2: ElephantAlpha system load 真實量測(production 必裝;缺失時 fallback 為 queue-based 估算)
|
||||
@@ -977,6 +977,11 @@ class ElephantAlphaAutonomousEngine:
|
||||
session.close()
|
||||
|
||||
def _get_system_load_percentage(self) -> float:
|
||||
# ADR-019 Phase 2 / critic post-review BLOCKER #2:
|
||||
# production 必裝 psutil(已加入 requirements.txt);ImportError fallback 僅作
|
||||
# defensive,使用 queue size 估算(pending=14→70%、pending≥18→飽和 90%)。
|
||||
# 該 fallback 與真實 CPU 無關,僅避免 dev 環境炸開;若 prod 觸發即代表
|
||||
# requirements.txt 與容器映像同步漏裝,需立刻補裝 psutil。
|
||||
try:
|
||||
import psutil
|
||||
return float(psutil.cpu_percent(interval=0.1))
|
||||
|
||||
@@ -109,7 +109,20 @@ def _fetch_sales_summary(days: int = 14) -> Dict[str, Any]:
|
||||
max_date = max_date_row[0] if max_date_row and max_date_row[0] else None
|
||||
|
||||
if not max_date or max_date < (datetime.now().date() - timedelta(days=2)):
|
||||
return {"stale": True, "last_date": str(max_date) if max_date else "None"}
|
||||
# ADR-019 Phase 2 / critic post-review BLOCKER #9:
|
||||
# stale 分支必須 return 完整 shape,避免沒套 stale gate 的上游 caller
|
||||
# 拿到 0 而靜默產出「NT$0 業績」報告。
|
||||
# 數值欄位用 None(而非 0),讓 prompt template 的 `:,.0f` 在誤用時
|
||||
# raise TypeError → 比靜默 0 更明顯,迫使呼叫端必須 `if sales.get("stale")` 擋下。
|
||||
return {
|
||||
"stale": True,
|
||||
"last_date": str(max_date) if max_date else "None",
|
||||
"daily": [],
|
||||
"current_7d_revenue": None,
|
||||
"prev_7d_revenue": None,
|
||||
"wow_pct": None,
|
||||
"sku_count": None,
|
||||
}
|
||||
|
||||
rows = session.execute(text("""
|
||||
SELECT
|
||||
@@ -308,6 +321,88 @@ def _save_to_ai_insights(
|
||||
session.close()
|
||||
|
||||
|
||||
def _send_data_stale_alert(report_type: str, last_date: str, period: str) -> bool:
|
||||
"""資料停更告警(daily/weekly/monthly 共用,24h dedupe)。
|
||||
|
||||
critic post-review BLOCKER #1 抽取:daily/weekly/monthly 任一報告路徑檢查到
|
||||
`_fetch_sales_summary` 回傳 `stale=True` 時呼叫此函式,避免:
|
||||
1. 每天 09:00 daily 偵測到 stale → 立刻發 NT$0 報告
|
||||
2. 多份報告同時偵測到 stale → 同一天送 N 次告警噪音
|
||||
|
||||
Dedupe 機制:以 ai_insights 表查近 24 小時是否已有同 report_type 的
|
||||
`insight_type='data_stale_alert'` 紀錄;若有,跳過 telegram 發送但仍 return True
|
||||
讓上游照常 return skipped。若無,發送 telegram 並寫入 ai_insights 留痕。
|
||||
|
||||
Args:
|
||||
report_type: "daily_report" / "weekly_strategy" / "monthly_report"
|
||||
last_date: daily_sales_snapshot 最後一筆 snapshot_date(字串)
|
||||
period: 該報告原本的 period 字串(用於告警訊息可讀性)
|
||||
|
||||
Returns:
|
||||
True — 告警已送出 或 已 dedupe 跳過(上游視為「停更已通知」)
|
||||
False — DB / Telegram 都失敗(上游可選擇 fallback)
|
||||
"""
|
||||
# Step 1: dedupe 查詢
|
||||
session = get_session()
|
||||
try:
|
||||
dedupe_row = session.execute(text("""
|
||||
SELECT id FROM ai_insights
|
||||
WHERE insight_type = 'data_stale_alert'
|
||||
AND created_by = 'openclaw'
|
||||
AND created_at >= NOW() - INTERVAL '24 hours'
|
||||
AND metadata_json->>'report_type' = :rt
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
"""), {"rt": report_type}).fetchone()
|
||||
except Exception as e:
|
||||
# dedupe 查詢失敗不該擋告警(寧可重複也別漏報)
|
||||
logger.warning("[OpenClaw] data_stale_alert dedupe 查詢失敗 rt=%s: %s", report_type, e)
|
||||
dedupe_row = None
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
if dedupe_row:
|
||||
logger.info(
|
||||
"[OpenClaw] data_stale_alert 已於 24h 內送過,跳過重複告警 rt=%s last_date=%s",
|
||||
report_type, last_date,
|
||||
)
|
||||
return True
|
||||
|
||||
# Step 2: 發送 telegram
|
||||
msg = (
|
||||
f"⚠️ [資料停更告警] {report_type}\n"
|
||||
f"daily_sales_snapshot 最後更新:{last_date}\n"
|
||||
f"原訂報告期間:{period}\n"
|
||||
f"請檢查人工上傳流程;本告警已自動跳過該報告產出。"
|
||||
)
|
||||
sent_ok = False
|
||||
try:
|
||||
from services.telegram_templates import _send_telegram_raw
|
||||
_send_telegram_raw(msg)
|
||||
sent_ok = True
|
||||
except Exception as e:
|
||||
logger.error("[OpenClaw] data_stale_alert telegram 發送失敗 rt=%s: %s", report_type, e)
|
||||
|
||||
# Step 3: 寫入 ai_insights 留痕(即使 telegram 失敗也寫,下次 dedupe 才有依據)
|
||||
try:
|
||||
_save_to_ai_insights(
|
||||
insight_type="data_stale_alert",
|
||||
content=msg,
|
||||
confidence=1.0,
|
||||
metadata={
|
||||
"report_type": report_type,
|
||||
"last_date": last_date,
|
||||
"period": period,
|
||||
"telegram_sent": sent_ok,
|
||||
},
|
||||
period=datetime.now().strftime("%Y-%m-%d"),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("[OpenClaw] data_stale_alert ai_insights 寫入失敗 rt=%s: %s", report_type, e)
|
||||
|
||||
return sent_ok
|
||||
|
||||
|
||||
def _find_existing_weekly_strategy(
|
||||
period: str,
|
||||
sent_only: bool = False,
|
||||
@@ -779,12 +874,14 @@ def generate_weekly_strategy_report(
|
||||
# ── Step 1:DB 數據收集 ──────────────────────────────────────────────────
|
||||
sales = _fetch_sales_summary(14)
|
||||
if sales.get("stale"):
|
||||
msg = f"⚠️ [資料停更告警] daily_sales_snapshot 最後更新為 {sales.get('last_date')},請檢查人工上傳流程。"
|
||||
try:
|
||||
from services.telegram_templates import _send_telegram_raw
|
||||
_send_telegram_raw(msg)
|
||||
except Exception:
|
||||
pass
|
||||
# critic post-review BLOCKER #1:改用統一 _send_data_stale_alert(24h dedupe)
|
||||
# weekly return shape 維持不動(status="error"),避免動到 Phase 2 已建立的
|
||||
# weekly dedupe/cache 機制下游語意。
|
||||
_send_data_stale_alert(
|
||||
report_type="weekly_strategy",
|
||||
last_date=str(sales.get("last_date")),
|
||||
period=period,
|
||||
)
|
||||
return {"status": "error", "reason": "data_stale"}
|
||||
|
||||
threats = _fetch_top_threats(10)
|
||||
@@ -1011,6 +1108,28 @@ def generate_daily_report() -> dict:
|
||||
|
||||
# ── Step 1:DB 數據收集 ──────────────────────────────────────────────────
|
||||
sales = _fetch_sales_summary(7)
|
||||
|
||||
# critic post-review BLOCKER #1:daily 路徑必須與 weekly 對齊套 stale gate,
|
||||
# 否則 daily_sales_snapshot 過期時會發出 NT$0 的偽日報(每天 09:00 復發)。
|
||||
if sales.get("stale"):
|
||||
last_date = str(sales.get("last_date"))
|
||||
logger.warning(
|
||||
"[OpenClaw] 日報任務跳過:daily_sales_snapshot 已停更 last_date=%s period=%s",
|
||||
last_date, period,
|
||||
)
|
||||
_send_data_stale_alert(
|
||||
report_type="daily_report",
|
||||
last_date=last_date,
|
||||
period=period,
|
||||
)
|
||||
return {
|
||||
"status": "skipped",
|
||||
"report_type": "daily_report",
|
||||
"reason": "data_stale",
|
||||
"last_date": last_date,
|
||||
"period": period,
|
||||
}
|
||||
|
||||
threats = _fetch_top_threats(5)
|
||||
recommendations = _fetch_top_recommendations(5)
|
||||
competitor_summary = _fetch_competitor_summary()
|
||||
@@ -1168,6 +1287,30 @@ def generate_monthly_report() -> dict:
|
||||
# ── Step 1:DB 數據收集(上月完整數據)─────────────────────────────────
|
||||
days_in_month = (first_of_this_month - last_month_start).days
|
||||
sales = _fetch_monthly_sales_summary(last_month_start, last_month_end)
|
||||
|
||||
# critic post-review BLOCKER #1:monthly 路徑同樣加 stale gate。
|
||||
# _fetch_monthly_sales_summary 沒有內建 stale 檢查(查固定日期區間),
|
||||
# 若上月 daily_sales_snapshot 完全沒匯入則 revenue=0、sku_count=0,
|
||||
# 會產出「NT$0 月報」誤導決策。以「revenue=0 且 sku=0」當資料缺失訊號。
|
||||
if (sales.get("revenue", 0) or 0) == 0 and (sales.get("sku_count", 0) or 0) == 0:
|
||||
last_date_str = last_month_end.strftime("%Y-%m-%d")
|
||||
logger.warning(
|
||||
"[OpenClaw] 月報任務跳過:上月 daily_sales_snapshot 無資料 period=%s",
|
||||
period,
|
||||
)
|
||||
_send_data_stale_alert(
|
||||
report_type="monthly_report",
|
||||
last_date=last_date_str,
|
||||
period=period,
|
||||
)
|
||||
return {
|
||||
"status": "skipped",
|
||||
"report_type": "monthly_report",
|
||||
"reason": "data_stale",
|
||||
"last_date": last_date_str,
|
||||
"period": period,
|
||||
}
|
||||
|
||||
categories = _fetch_category_breakdown(days_in_month)
|
||||
threats = _fetch_top_threats(10)
|
||||
competitor_summary = _fetch_competitor_summary()
|
||||
|
||||
Reference in New Issue
Block a user