fix(nvidia): lower default max_tokens to 1024 to fit nemotron-mini 4096 context length

2026-03-31 13:44:17 +08:00
parent f458d078df
commit 11627f25f0
1 changed files with 1 additions and 1 deletions
--- a/apps/api/src/services/nvidia_provider.py
+++ b/apps/api/src/services/nvidia_provider.py
@@ -655,7 +655,7 @@ class NvidiaProvider:
        prompt: str,
        model: str | None = None,
        temperature: float = 0.1,
-        max_tokens: int = 2048,
+        max_tokens: int = 1024,
    ) -> tuple[str, bool, int, float]:
        """
        一般對話 (非 Tool Calling) - 用於 RCA 分析