Source code for veupath_chatbot.ai.engines.cached_anthropic

"""Anthropic engine with prompt caching and thinking-block fixes."""

import json
import warnings
from typing import Any

from kani import AIFunction, ChatMessage
from kani.engines.anthropic import AnthropicEngine
from kani.engines.anthropic.parts import AnthropicThinkingPart, AnthropicUnknownPart
from kani.engines.base import Completion
from kani.models import FunctionCall, MessagePart, ToolCall



[docs]
class CachedAnthropicEngine(AnthropicEngine):
    """AnthropicEngine subclass that adds prompt caching and fixes thinking blocks.

    - Anthropic's prompt caching reduces cache-hit costs by 90%.
    - Wraps single-MessagePart content in a list to prevent Pydantic validation
      errors when the response is a bare thinking block.
    """

    @staticmethod
    def _prepare_request(
        messages: list[ChatMessage],
        functions: list[AIFunction],
        *,
        intent: str = "create",
    ) -> tuple[dict[str, Any], list[dict[str, Any]]]:
        kwargs, translated = AnthropicEngine._prepare_request(
            messages, functions, intent=intent
        )
        # Wrap the system prompt (a plain string) with cache_control.
        system_text = kwargs.get("system")
        if isinstance(system_text, str) and system_text:
            kwargs["system"] = [
                {
                    "type": "text",
                    "text": system_text,
                    "cache_control": {"type": "ephemeral"},
                }
            ]
        return kwargs, translated

    def _translate_anthropic_message(self, message: Any) -> Completion:
        """Translate an Anthropic Message, fixing the bare-MessagePart bug.

        Upstream kani does ``content = parts[0] if len(parts) == 1 else parts``
        which produces a bare MessagePart when the response is a single thinking
        block.  ``ChatMessage.content`` expects ``str | list[...] | None``, so a
        bare MessagePart triggers a Pydantic ValidationError.

        This override reproduces the upstream logic but always wraps non-string
        single parts in a list.
        """
        tool_calls: list[ToolCall] = []
        parts: list[str | MessagePart] = []
        for part in message.content:
            if part.type == "text":
                parts.append(part.text)
            elif part.type == "tool_use":
                fc = FunctionCall(name=part.name, arguments=json.dumps(part.input))
                tc = ToolCall(id=part.id, type="function", function=fc)
                tool_calls.append(tc)
            elif part.type == "thinking":
                parts.append(
                    AnthropicThinkingPart(
                        content=part.thinking, signature=part.signature
                    )
                )
            else:
                parts.append(
                    AnthropicUnknownPart(type=part.type, data=part.model_dump())
                )
                warnings.warn(
                    f"Unknown Anthropic content block type: {part.type}",
                    stacklevel=2,
                )

        # Fix: only unwrap to a bare value when the single part is a plain string.
        # A bare MessagePart would fail Pydantic validation on ChatMessage.content.
        if len(parts) == 1 and isinstance(parts[0], str):
            content: str | list[str | MessagePart] = parts[0]
        else:
            content = parts

        kani_msg = ChatMessage.assistant(content, tool_calls=tool_calls or None)

        self.set_cached_message_len(kani_msg, message.usage.output_tokens)
        kani_msg.extra["anthropic_message"] = message

        return Completion(
            message=kani_msg,
            prompt_tokens=message.usage.input_tokens,
            completion_tokens=message.usage.output_tokens,
        )