#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ services/anthropic_service.py Operation Ollama-First v5.0 / Phase 7 — Anthropic Claude API 包裝 模型對照(2026-05 市場): - claude-opus-4-7: 程式碼 #1(Arena Elo 1548),200K context,$15/$75 per 1M tokens - claude-sonnet-4-6: agentic 平衡型,200K context,$3/$15 per 1M tokens - claude-haiku-4-5: 輕量快速,200K context,$0.8/$4 per 1M tokens 設計原則: 1. SDK 包裝層;介面與 services/gemini_service.py 對齊(generate / check_connection) 2. 自動 prompt cache(5 分鐘 ephemeral TTL,重複 system_prompt 省 ~90% 成本) 3. usage 完整回傳:input_tokens / output_tokens / cache_creation_input_tokens / cache_read_input_tokens 4. 主流程不爆:失敗回 ClaudeResponse(success=False),由呼叫端決定 fallback 路徑 5. feature flag 控制由呼叫端負責(如 code_review 的 CODE_REVIEW_USE_CLAUDE);本模組純 SDK 包裝 """ from __future__ import annotations import logging import os import time from dataclasses import dataclass from typing import Optional logger = logging.getLogger(__name__) # 環境參數 — runtime read(避免 import-time freeze 影響部署切換) ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY', '') DEFAULT_MODEL = os.getenv('CLAUDE_MODEL', 'claude-opus-4-7') TIMEOUT = int(os.getenv('CLAUDE_TIMEOUT', '120')) @dataclass class ClaudeResponse: """Claude API 回應結構(與 GeminiResponse 對齊風格)""" success: bool content: str model: str input_tokens: int = 0 output_tokens: int = 0 cache_creation_tokens: int = 0 cache_read_tokens: int = 0 duration_ms: int = 0 error: Optional[str] = None @property def cache_hit(self) -> bool: """是否命中 prompt cache(cache_read_input_tokens > 0 即視為 hit)""" return self.cache_read_tokens > 0 @property def total_tokens(self) -> int: return self.input_tokens + self.output_tokens class AnthropicService: """Claude API 包裝 — 支援 prompt cache + 結構化 usage 回傳""" def __init__(self, model: str = None, api_key: str = None): """ Args: model: 預設模型,預設讀 CLAUDE_MODEL env(claude-opus-4-7) api_key: API key,預設讀 ANTHROPIC_API_KEY env """ self.model = model or DEFAULT_MODEL self.api_key = api_key or ANTHROPIC_API_KEY self._client = None self._init_client() def _init_client(self) -> None: """初始化 SDK;無 API key 或 SDK 未安裝時靜默退化(is_available()→False)""" if not self.api_key: logger.info("[Anthropic] ANTHROPIC_API_KEY 未設定,service 不可用") return try: import anthropic self._client = anthropic.Anthropic(api_key=self.api_key) logger.info("[Anthropic] SDK 初始化成功,預設模型=%s", self.model) except ImportError: logger.error("[Anthropic] SDK 未安裝(pip install anthropic>=0.40.0)") except Exception as e: logger.error("[Anthropic] SDK 初始化失敗: %s", e) def is_available(self) -> bool: """SDK 是否就緒可呼叫(API key 有設且 client 初始化成功) Phase 23(2026-05-04)整合 cost_throttle: 若 'claude' provider 被 throttle(月底推估 > 110%),is_available 回 False 讓 caller 自動走 Gemini fallback,不送 Claude 請求。 COST_THROTTLE_ENABLED=false 時不影響行為(戰役預設)。 """ if self._client is None: return False try: from services.cost_throttle_service import is_provider_throttled if is_provider_throttled('claude'): logger.info("[Anthropic] is_available()=False — cost throttled, caller 應 fallback Gemini") return False except Exception: logger.warning("[Anthropic] cost_throttle check failed; continuing as available", exc_info=True) return True def generate( self, prompt: str, system_prompt: Optional[str] = None, model: Optional[str] = None, max_tokens: int = 4096, temperature: float = 0.3, cache_system: bool = True, timeout: Optional[int] = None, ) -> ClaudeResponse: """ 呼叫 Claude API 生成內容。 Args: prompt: user prompt system_prompt: system instruction(建議放穩定不變的指令,搭配 cache_system=True 省成本) model: 模型名(預設用 self.model) max_tokens: 輸出上限(Claude 4 系列皆 ≥ 8192 OK) temperature: 0.0-1.0(code review 建議 0.2,文案 0.7) cache_system: True 時 system_prompt 加 ephemeral cache_control(5 分鐘 TTL) timeout: 自訂 timeout(秒),預設 CLAUDE_TIMEOUT env Returns: ClaudeResponse:永遠回 dataclass,失敗時 success=False + error 訊息(不 raise) """ model_name = model or self.model request_timeout = timeout if timeout is not None else TIMEOUT start = time.monotonic() if not self._client: return ClaudeResponse( success=False, content="", model=model_name, error="anthropic client not initialized (check ANTHROPIC_API_KEY)", ) try: messages = [{"role": "user", "content": prompt}] kwargs = { "model": model_name, "max_tokens": max_tokens, "temperature": temperature, "messages": messages, "timeout": request_timeout, } if system_prompt: if cache_system: # Anthropic ephemeral cache:5 分鐘 TTL,重複 system_prompt 省 90% 成本 kwargs["system"] = [ { "type": "text", "text": system_prompt, "cache_control": {"type": "ephemeral"}, } ] else: kwargs["system"] = system_prompt resp = self._client.messages.create(**kwargs) # 解析回應內容(多個 ContentBlock 串接) content_parts = [] for block in (resp.content or []): text = getattr(block, 'text', None) if text: content_parts.append(text) content = "\n".join(content_parts) usage = getattr(resp, 'usage', None) input_tokens = int(getattr(usage, 'input_tokens', 0) or 0) if usage else 0 output_tokens = int(getattr(usage, 'output_tokens', 0) or 0) if usage else 0 cache_creation = int(getattr(usage, 'cache_creation_input_tokens', 0) or 0) if usage else 0 cache_read = int(getattr(usage, 'cache_read_input_tokens', 0) or 0) if usage else 0 duration_ms = int((time.monotonic() - start) * 1000) logger.info( "[Anthropic] generate ok model=%s tokens=%d/%d cache=%d/%d duration=%dms", model_name, input_tokens, output_tokens, cache_creation, cache_read, duration_ms, ) return ClaudeResponse( success=True, content=content, model=getattr(resp, 'model', model_name) or model_name, input_tokens=input_tokens, output_tokens=output_tokens, cache_creation_tokens=cache_creation, cache_read_tokens=cache_read, duration_ms=duration_ms, ) except Exception as e: duration_ms = int((time.monotonic() - start) * 1000) err_msg = f"{type(e).__name__}: {str(e)[:300]}" logger.warning("[Anthropic] generate failed model=%s duration=%dms err=%s", model_name, duration_ms, err_msg) return ClaudeResponse( success=False, content="", model=model_name, duration_ms=duration_ms, error=err_msg, ) def check_connection(self) -> bool: """輕量連線檢查:發 1 token 探測 message。不可用時回 False,不 raise。""" if not self._client: return False try: r = self.generate( prompt="ping", max_tokens=10, temperature=0.0, cache_system=False, timeout=15, ) return r.success except Exception as e: logger.warning("[Anthropic] check_connection failed: %s", e) return False # 全域單例(與 gemini_service 模式對齊) anthropic_service = AnthropicService() if __name__ == "__main__": # 手動煙霧測試(需設 ANTHROPIC_API_KEY) logging.basicConfig(level=logging.INFO) svc = AnthropicService() print(f"is_available: {svc.is_available()}") if svc.is_available(): r = svc.generate( prompt="用一句話介紹 Python。", system_prompt="你是繁體中文助手。", max_tokens=100, ) print(f"success={r.success} tokens={r.input_tokens}/{r.output_tokens} " f"cache={r.cache_creation_tokens}/{r.cache_read_tokens} duration={r.duration_ms}ms") print(r.content if r.success else r.error)