From a303b5ef917ef5bfa08f2532b9e926d006c64d44 Mon Sep 17 00:00:00 2001 From: OG T Date: Thu, 9 Apr 2026 23:38:57 +0800 Subject: [PATCH] =?UTF-8?q?feat(chat):=20NemoClaw=20=E6=94=B9=E6=8E=A5=20O?= =?UTF-8?q?llama=20111=20deepseek-r1:14b?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 2026-04-09 ogt: 棄用 Claude Haiku,改用本地 deepseek-r1:14b - 端點: http://192.168.0.111:11434 - 過濾 ... 推理區塊,只回傳結論 - timeout 120s(14b 推理較慢) - 完全免費,不計入 Claude API 費用 Co-Authored-By: Claude Sonnet 4.6 --- apps/api/src/services/chat_manager.py | 74 +++++++++++---------------- 1 file changed, 31 insertions(+), 43 deletions(-) diff --git a/apps/api/src/services/chat_manager.py b/apps/api/src/services/chat_manager.py index 7ab341f0..976ef0ac 100644 --- a/apps/api/src/services/chat_manager.py +++ b/apps/api/src/services/chat_manager.py @@ -4,13 +4,15 @@ AWOOOI Chat Manager - 雙 AI 對話核心 Phase 21.5 初版: 2026-03-31 ogt Phase 22.6 重寫: 2026-04-03 ogt (老闆需求: 雙 AI 互動對話) Phase 22.7 更新: 2026-04-03 ogt (老闆指示: OpenClaw→Gemini, NemoClaw→Ollama llama3.2:3b) +Phase 22.8 更新: 2026-04-09 ogt (老闆指示: NemoClaw→Ollama 111 deepseek-r1:14b,SRE 推理更強) 架構: - OpenClaw (Gemini API): SRE 首席顧問,精準分析 -- NemoClaw (Ollama llama3.2:3b): 戰術參謀,快速補充 +- NemoClaw (Ollama 192.168.0.111 deepseek-r1:14b): 戰術參謀,深度推理 費用控管: - Gemini Flash: Input $0.075/1M tokens, Output $0.30/1M tokens +- NemoClaw: 免費 (本地 Ollama) - 每次回覆顯示 token 用量與費用 - 月上限 $10 USD (由 ai_rate_limiter 控管) """ @@ -29,7 +31,7 @@ OPENCLAW_PERSONA = """你是 OpenClaw,AWOOOI 平台的 SRE AI 首席顧問。 稱呼用戶為「老闆」。 """ -NEMOCLAW_PERSONA = """你是 NemoClaw,AWOOOI 平台的 AI 戰術參謀,由 NVIDIA Nemotron 驅動。 +NEMOCLAW_PERSONA = """你是 NemoClaw,AWOOOI 平台的 AI 戰術參謀,由 DeepSeek-R1 驅動。 個性: 分析型、從不同角度思考,會質疑假設。 語氣: 帶點挑釁但建設性。不超過 200 字。 稱呼用戶為「老闆」。評論 OpenClaw 的回應時,直接說「我補充」或「我有不同看法」。 @@ -144,64 +146,50 @@ class ChatManager: async def _call_nemotron(self, system_prompt: str, user_message: str) -> str | None: """ - 呼叫 NemoClaw 對話 — Claude API (claude-haiku-4-5) + 呼叫 NemoClaw 對話 — Ollama 111 deepseek-r1:14b - 2026-04-03 ogt: 老闆指示改接 Claude API,快速且中文能力強 - 用 haiku 降低費用 + 2026-04-09 ogt: 改接 192.168.0.111 Ollama deepseek-r1:14b,SRE 推理能力最強 + deepseek-r1 含 標籤,需過濾後才回傳 """ import httpx - from src.core.config import get_settings - settings = get_settings() + import re - api_key = getattr(settings, 'CLAUDE_API_KEY', None) - if not api_key: - logger.warning("nemotron_chat_failed", error="CLAUDE_API_KEY not configured") - return None + OLLAMA_URL = "http://192.168.0.111:11434" + MODEL = "deepseek-r1:14b" try: - async with httpx.AsyncClient(timeout=30.0) as client: + async with httpx.AsyncClient(timeout=120.0) as client: resp = await client.post( - "https://api.anthropic.com/v1/messages", - headers={ - "x-api-key": api_key, - "anthropic-version": "2023-06-01", - "content-type": "application/json", - }, + f"{OLLAMA_URL}/api/chat", json={ - "model": "claude-haiku-4-5-20251001", - "max_tokens": 300, - "system": system_prompt, - "messages": [{"role": "user", "content": user_message}], + "model": MODEL, + "stream": False, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_message}, + ], + "options": {"num_predict": 400}, }, ) resp.raise_for_status() data = resp.json() - text = data["content"][0]["text"].strip() + raw = data.get("message", {}).get("content", "").strip() - # Token/費用統計 — Claude Haiku 4.5: Input $0.80/1M, Output $4.00/1M - usage = data.get("usage", {}) - in_tok = usage.get("input_tokens", 0) - out_tok = usage.get("output_tokens", 0) - cost = (in_tok * 0.0000008) + (out_tok * 0.000004) + # 過濾 deepseek-r1 的 ... 推理區塊 + text = re.sub(r".*?", "", raw, flags=re.DOTALL).strip() + if not text: + text = raw # 萬一全是 think block,直接回傳原文 - # 月累計到 Redis - from src.core.redis_client import get_redis - from src.utils.timezone import now_taipei - redis = get_redis() - month_key = f"claude_cost:{now_taipei().strftime('%Y-%m')}" - try: - current = float(await redis.get(month_key) or 0) - new_total = current + cost - await redis.set(month_key, str(round(new_total, 6)), ex=40 * 24 * 3600) - except Exception: - new_total = cost + eval_count = data.get("eval_count", 0) + prompt_eval_count = data.get("prompt_eval_count", 0) + total_tokens = eval_count + prompt_eval_count - logger.info("nemotron_claude_usage", in_tokens=in_tok, out_tokens=out_tok, - cost_usd=round(cost, 6), monthly_total_usd=round(new_total, 4)) + logger.info("nemotron_ollama_usage", model=MODEL, + prompt_tokens=prompt_eval_count, output_tokens=eval_count) - return f"{text}\n\n📊 {in_tok+out_tok} tokens | ${cost:.4f} | 本月累計 ${new_total:.4f}" + return f"{text}\n\n🦙 {MODEL} | {total_tokens} tokens | 免費" except Exception as e: - logger.warning("nemotron_chat_failed", error=str(e)) + logger.warning("nemotron_chat_failed", model=MODEL, error=str(e)) return None async def generate_response(