From fbf122fa1f7a0d69b9800ff75ef1475246ad33e2 Mon Sep 17 00:00:00 2001 From: OG T Date: Fri, 3 Apr 2026 18:41:15 +0800 Subject: [PATCH] =?UTF-8?q?fix(chat):=20OpenClaw=20=E6=94=B9=E7=94=A8=20NI?= =?UTF-8?q?M=20llama-3.1-8b=20=E5=B0=8D=E8=A9=B1=20+=20NemoClaw=20timeout?= =?UTF-8?q?=20120s=20+=20=E8=80=81=E9=97=86=E7=A8=B1=E8=AC=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. _call_openclaw: 改用 NIM meta/llama-3.1-8b-instruct 舊的 analyze/incident 是告警 API,回覆是告警格式,不適合對話 2. _call_nemotron: 移除 Ollama fallback,回到純 NIM 3. NEMOTRON_TIMEOUT_SECONDS: 55 → 120 (ConfigMap 已更新) 4. 修正「統帥」→「老闆」 Co-Authored-By: Claude Sonnet 4.6 --- apps/api/src/services/chat_manager.py | 82 +++++++-------------------- 1 file changed, 20 insertions(+), 62 deletions(-) diff --git a/apps/api/src/services/chat_manager.py b/apps/api/src/services/chat_manager.py index fb73f174a..922313d28 100644 --- a/apps/api/src/services/chat_manager.py +++ b/apps/api/src/services/chat_manager.py @@ -78,52 +78,35 @@ class ChatManager: async def _call_openclaw(self, system_prompt: str, user_message: str) -> str | None: """ - 呼叫 OpenClaw 對話 — 走 Ollama qwen2.5:7b-instruct (192.168.0.188:11434) + 呼叫 OpenClaw 對話 — 走 NVIDIA NIM meta/llama-3.1-8b-instruct 2026-04-03 ogt: OpenClaw 8088 的 analyze/incident 是告警分析 API, - 不適合做自然語言對話(回覆會是告警格式)。 - 改用 Ollama 本地模型做 chat,速度快、無格式污染。 + 回覆是告警格式,不適合自然語言對話。 + 改用 NIM llama-3.1-8b 做 chat,與 NemoClaw 同樣走免費 NIM cloud。 """ - import httpx - from src.core.config import get_settings - settings = get_settings() - - ollama_url = getattr(settings, 'OLLAMA_URL', 'http://192.168.0.188:11434') - openclaw_timeout = float(getattr(settings, 'OPENCLAW_TIMEOUT', 40.0)) + from src.services.nvidia_provider import get_nvidia_provider + nvidia = get_nvidia_provider() try: - async with httpx.AsyncClient(timeout=openclaw_timeout) as client: - resp = await client.post( - f"{ollama_url}/api/chat", - json={ - "model": "qwen2.5:7b-instruct", - "stream": False, - "messages": [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_message}, - ], - "options": {"num_predict": 300}, - }, - ) - resp.raise_for_status() - data = resp.json() - return data.get("message", {}).get("content", "").strip() or None + full_prompt = f"{system_prompt}\n\n用戶訊息: {user_message}" + response, success, _, _ = await nvidia.chat( + prompt=full_prompt, + model="meta/llama-3.1-8b-instruct", + max_tokens=300, + ) + if success and response and "not configured" not in response and "Circuit Breaker" not in response: + return response.strip() + return None except Exception as e: logger.warning("openclaw_chat_failed", error=str(e)) return None async def _call_nemotron(self, system_prompt: str, user_message: str) -> str | None: """ - 呼叫 NemoClaw — NIM 優先,超時則 fallback 到 Ollama llama3.2:3b + 呼叫 NVIDIA NIM nemotron-mini-4b (NemoClaw) - 2026-04-03 ogt: NIM 免費 tier 延遲 11-45s 且常超時, - 加 Ollama fallback 確保 NemoClaw 一定有回應。 + NIM 免費 tier 延遲 11-45s,此方法可能需要 30-120s 才回應 """ - import httpx - from src.core.config import get_settings as _get_settings from src.services.nvidia_provider import get_nvidia_provider - settings = _get_settings() - - # 優先嘗試 NIM (timeout 20s,快速失敗) nvidia = get_nvidia_provider() try: full_prompt = f"{system_prompt}\n\n用戶訊息: {user_message}" @@ -134,35 +117,10 @@ class ChatManager: ) if success and response and "not configured" not in response and "Circuit Breaker" not in response: return response.strip() + return None except Exception as e: - logger.warning("nemotron_nim_failed_fallback_ollama", error=str(e)) - - # Fallback: Ollama llama3.2:3b (本地,速度快) - ollama_url = getattr(settings, 'OLLAMA_URL', 'http://192.168.0.188:11434') - try: - async with httpx.AsyncClient(timeout=30.0) as client: - resp = await client.post( - f"{ollama_url}/api/chat", - json={ - "model": "llama3.2:3b", - "stream": False, - "messages": [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_message}, - ], - "options": {"num_predict": 250}, - }, - ) - resp.raise_for_status() - data = resp.json() - result = data.get("message", {}).get("content", "").strip() - if result: - logger.info("nemotron_ollama_fallback_used") - return result - except Exception as e: - logger.warning("nemotron_ollama_fallback_failed", error=str(e)) - - return None + logger.warning("nemotron_chat_failed", error=str(e)) + return None async def generate_response( self, @@ -199,7 +157,7 @@ class ChatManager: nemo_task = asyncio.create_task( self._call_nemotron( f"{NEMOCLAW_PERSONA}\n{context}", - f"統帥問了: {text}\n\n請從 NemoClaw 角度補充或評論。", + f"老闆問了: {text}\n\n請從 NemoClaw 角度補充或評論。", ) )