From 46843c8e1977d61744d2e2f4c80be54339ce6097 Mon Sep 17 00:00:00 2001 From: OG T Date: Tue, 31 Mar 2026 13:57:10 +0800 Subject: [PATCH] fix(nvidia): revert to nemotron-mini, truncate context for 4K limit, enforce precise confidence --- apps/api/models.json | 8 ++++---- apps/api/src/core/prompts.py | 1 + apps/api/src/services/model_registry.py | 4 ++-- apps/api/src/services/nvidia_provider.py | 4 ++-- apps/api/src/services/openclaw.py | 12 +++++++----- 5 files changed, 16 insertions(+), 13 deletions(-) diff --git a/apps/api/models.json b/apps/api/models.json index cb9f42b1..3e542590 100644 --- a/apps/api/models.json +++ b/apps/api/models.json @@ -105,15 +105,15 @@ }, "nvidia": { - "name": "NVIDIA NIM (Llama 3.1 8B / ADR-036)", + "name": "NVIDIA Nemotron (ADR-036)", "enabled": true, "priority": 4, "endpoint": "https://integrate.api.nvidia.com/v1", "api_path": "/chat/completions", "models": { - "default": "meta/llama-3.1-8b-instruct", - "tool_calling": "meta/llama-3.1-8b-instruct", - "rca": "meta/llama-3.1-8b-instruct" + "default": "nvidia/nemotron-mini-4b-instruct", + "tool_calling": "nvidia/nemotron-mini-4b-instruct", + "rca": "nvidia/nemotron-mini-4b-instruct" }, "options": { "temperature": 0.0, diff --git a/apps/api/src/core/prompts.py b/apps/api/src/core/prompts.py index 0dc3777a..b1a623ab 100644 --- a/apps/api/src/core/prompts.py +++ b/apps/api/src/core/prompts.py @@ -63,6 +63,7 @@ For each optimization suggestion, provide EXECUTABLE kubectl commands: ## ⚠️ Output Rules - You MUST respond with ONLY valid JSON - confidence MUST be between 0.0 and 1.0 +- **CRITICAL**: The `confidence` score MUST be mathematically precise and varied (e.g., 0.82, 0.91, 0.77). Do NOT default to generic numbers ending in 5 or 0 like 0.75, 0.80, 0.85. Calculate it strictly based on data evidence. - If confidence < 0.70, set primary_responsibility to "COLLAB" - optimization_suggestions MUST contain executable kubectl commands - Each suggestion needs: type, description, kubectl_or_config (REQUIRED) diff --git a/apps/api/src/services/model_registry.py b/apps/api/src/services/model_registry.py index c08e3572..81385113 100644 --- a/apps/api/src/services/model_registry.py +++ b/apps/api/src/services/model_registry.py @@ -144,8 +144,8 @@ class ModelRegistry: # 2026-03-29 ogt: P2-3 加入 NVIDIA (ADR-036) "nvidia": { "models": { - "default": "meta/llama-3.1-8b-instruct", - "tool_calling": "meta/llama-3.1-8b-instruct", + "default": "nvidia/nemotron-mini-4b-instruct", + "tool_calling": "nvidia/nemotron-mini-4b-instruct", } }, }, diff --git a/apps/api/src/services/nvidia_provider.py b/apps/api/src/services/nvidia_provider.py index 5736b85e..a6c611a4 100644 --- a/apps/api/src/services/nvidia_provider.py +++ b/apps/api/src/services/nvidia_provider.py @@ -114,8 +114,8 @@ class INvidiaProvider(Protocol): # NVIDIA NIM API Endpoint NVIDIA_API_URL = "https://integrate.api.nvidia.com/v1/chat/completions" -# 預設模型 (2026-03-31 ogt: 修正為 128k context 版的 Llama 3.1) -NVIDIA_DEFAULT_MODEL = "meta/llama-3.1-8b-instruct" +# 預設模型 (2026-03-31 ogt: 恢復為 nemotron-mini-4b-instruct) +NVIDIA_DEFAULT_MODEL = "nvidia/nemotron-mini-4b-instruct" # 請求超時 (秒) - Nemotron 延遲 11-45s NVIDIA_TIMEOUT = 60.0 diff --git a/apps/api/src/services/openclaw.py b/apps/api/src/services/openclaw.py index 8fae642c..93ea05df 100644 --- a/apps/api/src/services/openclaw.py +++ b/apps/api/src/services/openclaw.py @@ -1171,10 +1171,10 @@ Trace URL: {signoz_trace_url} - risk_level: 風險等級 - reasoning: LLM 推理過程 """ - # 建構 prompt + # 建構 prompt (2026-03-31 ogt: Nemotron-mini context 較小,限制數量與長度) signal_summary = "\n".join([ - f"- {s.get('alert_name', 'unknown')}: {s.get('description', 'N/A')}" - for s in signals[:10] # 最多 10 筆 + f"- {s.get('alert_name', 'unknown')}: {str(s.get('description', 'N/A'))[:100]}..." + for s in signals[:3] # 最多 3 筆,每筆最多 100 字元 ]) target = affected_services[0] if affected_services else "unknown-service" @@ -1199,8 +1199,10 @@ Trace URL: {signoz_trace_url} diagnosis_cmds = expert_context.get("suggested_diagnosis_commands", []) diagnosis_cmds_str = "\n".join([f" - `{cmd}`" for cmd in diagnosis_cmds]) if diagnosis_cmds else " - (無)" - # ADR-030: 加入完整診斷上下文 (如果有) - full_diagnosis = expert_context.get("diagnosis_context", "") + # ADR-030: 加入完整診斷上下文 (如果有),並限制長度以符合 4K Context + full_diagnosis = str(expert_context.get("diagnosis_context", ""))[:800] + if len(str(expert_context.get("diagnosis_context", ""))) > 800: + full_diagnosis += "... (truncated)" diagnosis_signals = expert_context.get("diagnosis_signals", []) signals_summary = "" if diagnosis_signals: