From 22796c6aff5fc01abeca8af31499e1036ef93e5c Mon Sep 17 00:00:00 2001 From: OG T Date: Tue, 31 Mar 2026 13:49:49 +0800 Subject: [PATCH] fix(nvidia): upgrade to meta/llama-3.1-8b-instruct (128k context) to avoid 400 bad request on API --- apps/api/models.json | 8 ++++---- apps/api/src/services/model_registry.py | 4 ++-- apps/api/src/services/nvidia_provider.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/apps/api/models.json b/apps/api/models.json index 3e5425903..cb9f42b10 100644 --- a/apps/api/models.json +++ b/apps/api/models.json @@ -105,15 +105,15 @@ }, "nvidia": { - "name": "NVIDIA Nemotron (ADR-036)", + "name": "NVIDIA NIM (Llama 3.1 8B / ADR-036)", "enabled": true, "priority": 4, "endpoint": "https://integrate.api.nvidia.com/v1", "api_path": "/chat/completions", "models": { - "default": "nvidia/nemotron-mini-4b-instruct", - "tool_calling": "nvidia/nemotron-mini-4b-instruct", - "rca": "nvidia/nemotron-mini-4b-instruct" + "default": "meta/llama-3.1-8b-instruct", + "tool_calling": "meta/llama-3.1-8b-instruct", + "rca": "meta/llama-3.1-8b-instruct" }, "options": { "temperature": 0.0, diff --git a/apps/api/src/services/model_registry.py b/apps/api/src/services/model_registry.py index 813851139..c08e35727 100644 --- a/apps/api/src/services/model_registry.py +++ b/apps/api/src/services/model_registry.py @@ -144,8 +144,8 @@ class ModelRegistry: # 2026-03-29 ogt: P2-3 加入 NVIDIA (ADR-036) "nvidia": { "models": { - "default": "nvidia/nemotron-mini-4b-instruct", - "tool_calling": "nvidia/nemotron-mini-4b-instruct", + "default": "meta/llama-3.1-8b-instruct", + "tool_calling": "meta/llama-3.1-8b-instruct", } }, }, diff --git a/apps/api/src/services/nvidia_provider.py b/apps/api/src/services/nvidia_provider.py index 7603cd926..5736b85ec 100644 --- a/apps/api/src/services/nvidia_provider.py +++ b/apps/api/src/services/nvidia_provider.py @@ -114,8 +114,8 @@ class INvidiaProvider(Protocol): # NVIDIA NIM API Endpoint NVIDIA_API_URL = "https://integrate.api.nvidia.com/v1/chat/completions" -# 預設模型 (2026-03-29 ogt: 修正為可用的 mini 模型) -NVIDIA_DEFAULT_MODEL = "nvidia/nemotron-mini-4b-instruct" +# 預設模型 (2026-03-31 ogt: 修正為 128k context 版的 Llama 3.1) +NVIDIA_DEFAULT_MODEL = "meta/llama-3.1-8b-instruct" # 請求超時 (秒) - Nemotron 延遲 11-45s NVIDIA_TIMEOUT = 60.0