fix(nvidia): revert to nemotron-mini, truncate context for 4K limit, enforce precise confidence
All checks were successful
E2E Health Check / e2e-health (push) Successful in 17s
All checks were successful
E2E Health Check / e2e-health (push) Successful in 17s
This commit is contained in:
@@ -105,15 +105,15 @@
|
||||
},
|
||||
|
||||
"nvidia": {
|
||||
"name": "NVIDIA NIM (Llama 3.1 8B / ADR-036)",
|
||||
"name": "NVIDIA Nemotron (ADR-036)",
|
||||
"enabled": true,
|
||||
"priority": 4,
|
||||
"endpoint": "https://integrate.api.nvidia.com/v1",
|
||||
"api_path": "/chat/completions",
|
||||
"models": {
|
||||
"default": "meta/llama-3.1-8b-instruct",
|
||||
"tool_calling": "meta/llama-3.1-8b-instruct",
|
||||
"rca": "meta/llama-3.1-8b-instruct"
|
||||
"default": "nvidia/nemotron-mini-4b-instruct",
|
||||
"tool_calling": "nvidia/nemotron-mini-4b-instruct",
|
||||
"rca": "nvidia/nemotron-mini-4b-instruct"
|
||||
},
|
||||
"options": {
|
||||
"temperature": 0.0,
|
||||
|
||||
@@ -63,6 +63,7 @@ For each optimization suggestion, provide EXECUTABLE kubectl commands:
|
||||
## ⚠️ Output Rules
|
||||
- You MUST respond with ONLY valid JSON
|
||||
- confidence MUST be between 0.0 and 1.0
|
||||
- **CRITICAL**: The `confidence` score MUST be mathematically precise and varied (e.g., 0.82, 0.91, 0.77). Do NOT default to generic numbers ending in 5 or 0 like 0.75, 0.80, 0.85. Calculate it strictly based on data evidence.
|
||||
- If confidence < 0.70, set primary_responsibility to "COLLAB"
|
||||
- optimization_suggestions MUST contain executable kubectl commands
|
||||
- Each suggestion needs: type, description, kubectl_or_config (REQUIRED)
|
||||
|
||||
@@ -144,8 +144,8 @@ class ModelRegistry:
|
||||
# 2026-03-29 ogt: P2-3 加入 NVIDIA (ADR-036)
|
||||
"nvidia": {
|
||||
"models": {
|
||||
"default": "meta/llama-3.1-8b-instruct",
|
||||
"tool_calling": "meta/llama-3.1-8b-instruct",
|
||||
"default": "nvidia/nemotron-mini-4b-instruct",
|
||||
"tool_calling": "nvidia/nemotron-mini-4b-instruct",
|
||||
}
|
||||
},
|
||||
},
|
||||
|
||||
@@ -114,8 +114,8 @@ class INvidiaProvider(Protocol):
|
||||
# NVIDIA NIM API Endpoint
|
||||
NVIDIA_API_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
|
||||
|
||||
# 預設模型 (2026-03-31 ogt: 修正為 128k context 版的 Llama 3.1)
|
||||
NVIDIA_DEFAULT_MODEL = "meta/llama-3.1-8b-instruct"
|
||||
# 預設模型 (2026-03-31 ogt: 恢復為 nemotron-mini-4b-instruct)
|
||||
NVIDIA_DEFAULT_MODEL = "nvidia/nemotron-mini-4b-instruct"
|
||||
|
||||
# 請求超時 (秒) - Nemotron 延遲 11-45s
|
||||
NVIDIA_TIMEOUT = 60.0
|
||||
|
||||
@@ -1171,10 +1171,10 @@ Trace URL: {signoz_trace_url}
|
||||
- risk_level: 風險等級
|
||||
- reasoning: LLM 推理過程
|
||||
"""
|
||||
# 建構 prompt
|
||||
# 建構 prompt (2026-03-31 ogt: Nemotron-mini context 較小,限制數量與長度)
|
||||
signal_summary = "\n".join([
|
||||
f"- {s.get('alert_name', 'unknown')}: {s.get('description', 'N/A')}"
|
||||
for s in signals[:10] # 最多 10 筆
|
||||
f"- {s.get('alert_name', 'unknown')}: {str(s.get('description', 'N/A'))[:100]}..."
|
||||
for s in signals[:3] # 最多 3 筆,每筆最多 100 字元
|
||||
])
|
||||
|
||||
target = affected_services[0] if affected_services else "unknown-service"
|
||||
@@ -1199,8 +1199,10 @@ Trace URL: {signoz_trace_url}
|
||||
diagnosis_cmds = expert_context.get("suggested_diagnosis_commands", [])
|
||||
diagnosis_cmds_str = "\n".join([f" - `{cmd}`" for cmd in diagnosis_cmds]) if diagnosis_cmds else " - (無)"
|
||||
|
||||
# ADR-030: 加入完整診斷上下文 (如果有)
|
||||
full_diagnosis = expert_context.get("diagnosis_context", "")
|
||||
# ADR-030: 加入完整診斷上下文 (如果有),並限制長度以符合 4K Context
|
||||
full_diagnosis = str(expert_context.get("diagnosis_context", ""))[:800]
|
||||
if len(str(expert_context.get("diagnosis_context", ""))) > 800:
|
||||
full_diagnosis += "... (truncated)"
|
||||
diagnosis_signals = expert_context.get("diagnosis_signals", [])
|
||||
signals_summary = ""
|
||||
if diagnosis_signals:
|
||||
|
||||
Reference in New Issue
Block a user