fix(nvidia): upgrade to meta/llama-3.1-8b-instruct (128k context) to avoid 400 bad request on API
All checks were successful
E2E Health Check / e2e-health (push) Successful in 17s
All checks were successful
E2E Health Check / e2e-health (push) Successful in 17s
This commit is contained in:
@@ -105,15 +105,15 @@
|
||||
},
|
||||
|
||||
"nvidia": {
|
||||
"name": "NVIDIA Nemotron (ADR-036)",
|
||||
"name": "NVIDIA NIM (Llama 3.1 8B / ADR-036)",
|
||||
"enabled": true,
|
||||
"priority": 4,
|
||||
"endpoint": "https://integrate.api.nvidia.com/v1",
|
||||
"api_path": "/chat/completions",
|
||||
"models": {
|
||||
"default": "nvidia/nemotron-mini-4b-instruct",
|
||||
"tool_calling": "nvidia/nemotron-mini-4b-instruct",
|
||||
"rca": "nvidia/nemotron-mini-4b-instruct"
|
||||
"default": "meta/llama-3.1-8b-instruct",
|
||||
"tool_calling": "meta/llama-3.1-8b-instruct",
|
||||
"rca": "meta/llama-3.1-8b-instruct"
|
||||
},
|
||||
"options": {
|
||||
"temperature": 0.0,
|
||||
|
||||
@@ -144,8 +144,8 @@ class ModelRegistry:
|
||||
# 2026-03-29 ogt: P2-3 加入 NVIDIA (ADR-036)
|
||||
"nvidia": {
|
||||
"models": {
|
||||
"default": "nvidia/nemotron-mini-4b-instruct",
|
||||
"tool_calling": "nvidia/nemotron-mini-4b-instruct",
|
||||
"default": "meta/llama-3.1-8b-instruct",
|
||||
"tool_calling": "meta/llama-3.1-8b-instruct",
|
||||
}
|
||||
},
|
||||
},
|
||||
|
||||
@@ -114,8 +114,8 @@ class INvidiaProvider(Protocol):
|
||||
# NVIDIA NIM API Endpoint
|
||||
NVIDIA_API_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
|
||||
|
||||
# 預設模型 (2026-03-29 ogt: 修正為可用的 mini 模型)
|
||||
NVIDIA_DEFAULT_MODEL = "nvidia/nemotron-mini-4b-instruct"
|
||||
# 預設模型 (2026-03-31 ogt: 修正為 128k context 版的 Llama 3.1)
|
||||
NVIDIA_DEFAULT_MODEL = "meta/llama-3.1-8b-instruct"
|
||||
|
||||
# 請求超時 (秒) - Nemotron 延遲 11-45s
|
||||
NVIDIA_TIMEOUT = 60.0
|
||||
|
||||
Reference in New Issue
Block a user