fix(nvidia): super robust langfuse handling to prevent NoneType AttributeError
All checks were successful
E2E Health Check / e2e-health (push) Successful in 17s
All checks were successful
E2E Health Check / e2e-health (push) Successful in 17s
This commit is contained in:
@@ -491,23 +491,27 @@ class NvidiaProvider:
|
||||
span.set_attribute("ai.total_tokens", total_tokens)
|
||||
|
||||
# P1-1: Langfuse Generation 記錄
|
||||
langfuse_ctx.generation(
|
||||
name="nvidia_nemotron",
|
||||
model=model,
|
||||
input={"messages": messages, "tools": tool_names},
|
||||
output={
|
||||
"tool_calls": [
|
||||
{"name": tc.tool_name, "args": tc.arguments}
|
||||
for tc in tool_calls
|
||||
if tc.valid
|
||||
]
|
||||
},
|
||||
usage={"input": prompt_tokens, "output": completion_tokens},
|
||||
metadata={
|
||||
"latency_ms": round(latency_ms, 2),
|
||||
"valid_count": sum(1 for tc in tool_calls if tc.valid),
|
||||
},
|
||||
)
|
||||
if langfuse_ctx and hasattr(langfuse_ctx, "generation"):
|
||||
try:
|
||||
langfuse_ctx.generation(
|
||||
name="nvidia_nemotron",
|
||||
model=model,
|
||||
input={"messages": messages, "tools": tool_names},
|
||||
output={
|
||||
"tool_calls": [
|
||||
{"name": tc.tool_name, "args": tc.arguments}
|
||||
for tc in tool_calls
|
||||
if tc.valid
|
||||
]
|
||||
},
|
||||
usage={"input": prompt_tokens, "output": completion_tokens},
|
||||
metadata={
|
||||
"latency_ms": round(latency_ms, 2),
|
||||
"valid_count": sum(1 for tc in tool_calls if tc.valid),
|
||||
},
|
||||
)
|
||||
except Exception as le:
|
||||
logger.warning("langfuse_generation_failed_safe", error=str(le))
|
||||
|
||||
# P3-3: Prometheus 成功指標
|
||||
for tc in tool_calls:
|
||||
@@ -750,17 +754,21 @@ class NvidiaProvider:
|
||||
NVIDIA_LATENCY_HISTOGRAM.observe(latency_ms / 1000)
|
||||
|
||||
# Langfuse
|
||||
langfuse_ctx.generation(
|
||||
name="nvidia_chat",
|
||||
model=model_name,
|
||||
input=prompt[:500],
|
||||
output=text[:500],
|
||||
metadata={
|
||||
"total_tokens": total_tokens,
|
||||
"cost_usd": cost_usd,
|
||||
"latency_ms": round(latency_ms, 2),
|
||||
},
|
||||
)
|
||||
if langfuse_ctx and hasattr(langfuse_ctx, "generation"):
|
||||
try:
|
||||
langfuse_ctx.generation(
|
||||
name="nvidia_chat",
|
||||
model=model_name,
|
||||
input=prompt[:500],
|
||||
output=text[:500],
|
||||
metadata={
|
||||
"total_tokens": total_tokens,
|
||||
"cost_usd": cost_usd,
|
||||
"latency_ms": round(latency_ms, 2),
|
||||
},
|
||||
)
|
||||
except Exception as le:
|
||||
logger.warning("langfuse_chat_generation_failed_safe", error=str(le))
|
||||
|
||||
logger.info(
|
||||
"nvidia_chat_response_received",
|
||||
@@ -797,7 +805,13 @@ class NvidiaProvider:
|
||||
except Exception as e:
|
||||
self._circuit_breaker.record_failure()
|
||||
NVIDIA_REQUESTS_TOTAL.labels(status="error", tool_name="chat").inc()
|
||||
logger.warning("nvidia_chat_failed", error=str(e), error_type=type(e).__name__)
|
||||
import traceback
|
||||
logger.warning(
|
||||
"nvidia_chat_failed",
|
||||
error=str(e),
|
||||
error_type=type(e).__name__,
|
||||
stacktrace=traceback.format_exc()
|
||||
)
|
||||
return str(e), False, 0, 0.0
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user