diff --git a/requirements.txt b/requirements.txt index b6accad..25276ad 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,4 +26,5 @@ pgvector>=0.2 paramiko # ADR-013: AIOps SSH 跳板修復 python-pptx # ADR-014: PPT 簡報系統 matplotlib # 圖表生成(日報/週報/月報) -matplotlib-inline # Jupyter 相容層(可選) \ No newline at end of file +matplotlib-inline # Jupyter 相容層(可選) +psutil>=5.9 # ADR-019 Phase 2: ElephantAlpha system load 真實量測(production 必裝;缺失時 fallback 為 queue-based 估算) \ No newline at end of file diff --git a/services/elephant_alpha_autonomous_engine.py b/services/elephant_alpha_autonomous_engine.py index 8a8e211..d6300bf 100644 --- a/services/elephant_alpha_autonomous_engine.py +++ b/services/elephant_alpha_autonomous_engine.py @@ -977,6 +977,11 @@ class ElephantAlphaAutonomousEngine: session.close() def _get_system_load_percentage(self) -> float: + # ADR-019 Phase 2 / critic post-review BLOCKER #2: + # production 必裝 psutil(已加入 requirements.txt);ImportError fallback 僅作 + # defensive,使用 queue size 估算(pending=14→70%、pending≥18→飽和 90%)。 + # 該 fallback 與真實 CPU 無關,僅避免 dev 環境炸開;若 prod 觸發即代表 + # requirements.txt 與容器映像同步漏裝,需立刻補裝 psutil。 try: import psutil return float(psutil.cpu_percent(interval=0.1)) diff --git a/services/openclaw_strategist_service.py b/services/openclaw_strategist_service.py index 2417a0f..30a9f50 100644 --- a/services/openclaw_strategist_service.py +++ b/services/openclaw_strategist_service.py @@ -109,7 +109,20 @@ def _fetch_sales_summary(days: int = 14) -> Dict[str, Any]: max_date = max_date_row[0] if max_date_row and max_date_row[0] else None if not max_date or max_date < (datetime.now().date() - timedelta(days=2)): - return {"stale": True, "last_date": str(max_date) if max_date else "None"} + # ADR-019 Phase 2 / critic post-review BLOCKER #9: + # stale 分支必須 return 完整 shape,避免沒套 stale gate 的上游 caller + # 拿到 0 而靜默產出「NT$0 業績」報告。 + # 數值欄位用 None(而非 0),讓 prompt template 的 `:,.0f` 在誤用時 + # raise TypeError → 比靜默 0 更明顯,迫使呼叫端必須 `if sales.get("stale")` 擋下。 + return { + "stale": True, + "last_date": str(max_date) if max_date else "None", + "daily": [], + "current_7d_revenue": None, + "prev_7d_revenue": None, + "wow_pct": None, + "sku_count": None, + } rows = session.execute(text(""" SELECT @@ -308,6 +321,88 @@ def _save_to_ai_insights( session.close() +def _send_data_stale_alert(report_type: str, last_date: str, period: str) -> bool: + """資料停更告警(daily/weekly/monthly 共用,24h dedupe)。 + + critic post-review BLOCKER #1 抽取:daily/weekly/monthly 任一報告路徑檢查到 + `_fetch_sales_summary` 回傳 `stale=True` 時呼叫此函式,避免: + 1. 每天 09:00 daily 偵測到 stale → 立刻發 NT$0 報告 + 2. 多份報告同時偵測到 stale → 同一天送 N 次告警噪音 + + Dedupe 機制:以 ai_insights 表查近 24 小時是否已有同 report_type 的 + `insight_type='data_stale_alert'` 紀錄;若有,跳過 telegram 發送但仍 return True + 讓上游照常 return skipped。若無,發送 telegram 並寫入 ai_insights 留痕。 + + Args: + report_type: "daily_report" / "weekly_strategy" / "monthly_report" + last_date: daily_sales_snapshot 最後一筆 snapshot_date(字串) + period: 該報告原本的 period 字串(用於告警訊息可讀性) + + Returns: + True — 告警已送出 或 已 dedupe 跳過(上游視為「停更已通知」) + False — DB / Telegram 都失敗(上游可選擇 fallback) + """ + # Step 1: dedupe 查詢 + session = get_session() + try: + dedupe_row = session.execute(text(""" + SELECT id FROM ai_insights + WHERE insight_type = 'data_stale_alert' + AND created_by = 'openclaw' + AND created_at >= NOW() - INTERVAL '24 hours' + AND metadata_json->>'report_type' = :rt + ORDER BY created_at DESC + LIMIT 1 + """), {"rt": report_type}).fetchone() + except Exception as e: + # dedupe 查詢失敗不該擋告警(寧可重複也別漏報) + logger.warning("[OpenClaw] data_stale_alert dedupe 查詢失敗 rt=%s: %s", report_type, e) + dedupe_row = None + finally: + session.close() + + if dedupe_row: + logger.info( + "[OpenClaw] data_stale_alert 已於 24h 內送過,跳過重複告警 rt=%s last_date=%s", + report_type, last_date, + ) + return True + + # Step 2: 發送 telegram + msg = ( + f"⚠️ [資料停更告警] {report_type}\n" + f"daily_sales_snapshot 最後更新:{last_date}\n" + f"原訂報告期間:{period}\n" + f"請檢查人工上傳流程;本告警已自動跳過該報告產出。" + ) + sent_ok = False + try: + from services.telegram_templates import _send_telegram_raw + _send_telegram_raw(msg) + sent_ok = True + except Exception as e: + logger.error("[OpenClaw] data_stale_alert telegram 發送失敗 rt=%s: %s", report_type, e) + + # Step 3: 寫入 ai_insights 留痕(即使 telegram 失敗也寫,下次 dedupe 才有依據) + try: + _save_to_ai_insights( + insight_type="data_stale_alert", + content=msg, + confidence=1.0, + metadata={ + "report_type": report_type, + "last_date": last_date, + "period": period, + "telegram_sent": sent_ok, + }, + period=datetime.now().strftime("%Y-%m-%d"), + ) + except Exception as e: + logger.warning("[OpenClaw] data_stale_alert ai_insights 寫入失敗 rt=%s: %s", report_type, e) + + return sent_ok + + def _find_existing_weekly_strategy( period: str, sent_only: bool = False, @@ -779,12 +874,14 @@ def generate_weekly_strategy_report( # ── Step 1:DB 數據收集 ────────────────────────────────────────────────── sales = _fetch_sales_summary(14) if sales.get("stale"): - msg = f"⚠️ [資料停更告警] daily_sales_snapshot 最後更新為 {sales.get('last_date')},請檢查人工上傳流程。" - try: - from services.telegram_templates import _send_telegram_raw - _send_telegram_raw(msg) - except Exception: - pass + # critic post-review BLOCKER #1:改用統一 _send_data_stale_alert(24h dedupe) + # weekly return shape 維持不動(status="error"),避免動到 Phase 2 已建立的 + # weekly dedupe/cache 機制下游語意。 + _send_data_stale_alert( + report_type="weekly_strategy", + last_date=str(sales.get("last_date")), + period=period, + ) return {"status": "error", "reason": "data_stale"} threats = _fetch_top_threats(10) @@ -1011,6 +1108,28 @@ def generate_daily_report() -> dict: # ── Step 1:DB 數據收集 ────────────────────────────────────────────────── sales = _fetch_sales_summary(7) + + # critic post-review BLOCKER #1:daily 路徑必須與 weekly 對齊套 stale gate, + # 否則 daily_sales_snapshot 過期時會發出 NT$0 的偽日報(每天 09:00 復發)。 + if sales.get("stale"): + last_date = str(sales.get("last_date")) + logger.warning( + "[OpenClaw] 日報任務跳過:daily_sales_snapshot 已停更 last_date=%s period=%s", + last_date, period, + ) + _send_data_stale_alert( + report_type="daily_report", + last_date=last_date, + period=period, + ) + return { + "status": "skipped", + "report_type": "daily_report", + "reason": "data_stale", + "last_date": last_date, + "period": period, + } + threats = _fetch_top_threats(5) recommendations = _fetch_top_recommendations(5) competitor_summary = _fetch_competitor_summary() @@ -1168,6 +1287,30 @@ def generate_monthly_report() -> dict: # ── Step 1:DB 數據收集(上月完整數據)───────────────────────────────── days_in_month = (first_of_this_month - last_month_start).days sales = _fetch_monthly_sales_summary(last_month_start, last_month_end) + + # critic post-review BLOCKER #1:monthly 路徑同樣加 stale gate。 + # _fetch_monthly_sales_summary 沒有內建 stale 檢查(查固定日期區間), + # 若上月 daily_sales_snapshot 完全沒匯入則 revenue=0、sku_count=0, + # 會產出「NT$0 月報」誤導決策。以「revenue=0 且 sku=0」當資料缺失訊號。 + if (sales.get("revenue", 0) or 0) == 0 and (sales.get("sku_count", 0) or 0) == 0: + last_date_str = last_month_end.strftime("%Y-%m-%d") + logger.warning( + "[OpenClaw] 月報任務跳過:上月 daily_sales_snapshot 無資料 period=%s", + period, + ) + _send_data_stale_alert( + report_type="monthly_report", + last_date=last_date_str, + period=period, + ) + return { + "status": "skipped", + "report_type": "monthly_report", + "reason": "data_stale", + "last_date": last_date_str, + "period": period, + } + categories = _fetch_category_breakdown(days_in_month) threats = _fetch_top_threats(10) competitor_summary = _fetch_competitor_summary()