Source code for veupath_chatbot.ai.agents.factory

"""Factory helpers for constructing the agent and its engine."""

from typing import cast
from uuid import UUID

from kani import ChatMessage
from kani.engines.base import BaseEngine
from kani.engines.openai import OpenAIEngine

from veupath_chatbot.ai.engines.responses_openai import ResponsesOpenAIEngine
from veupath_chatbot.ai.models.catalog import (
    ModelProvider,
    ReasoningEffort,
    build_reasoning_hyperparams,
    get_model_entry,
)
from veupath_chatbot.platform.config import get_settings
from veupath_chatbot.platform.types import JSONObject

from .executor import PathfinderAgent


def _create_openai_engine(
    *,
    model: str,
    temperature: float,
    top_p: float,
    hyperparams: JSONObject,
    seed: int | None = None,
    max_context_size: int | None = None,
) -> BaseEngine:
    settings = get_settings()
    # Some OpenAI models only support default sampling params (temperature=1, top_p=1).
    # To avoid hard failures, we omit temperature/top_p entirely for those models.
    sampling_restricted = model.startswith(("gpt-5", "o1", "o3", "o4"))
    kwargs: dict[str, object] = {}
    if not sampling_restricted:
        kwargs["temperature"] = temperature
        kwargs["top_p"] = top_p
    if seed is not None:
        kwargs["seed"] = seed
    if max_context_size is not None:
        kwargs["max_context_size"] = max_context_size
    return ResponsesOpenAIEngine(
        api_key=settings.openai_api_key,
        model=model,
        **kwargs,
        **(hyperparams or {}),
    )


def _create_anthropic_engine(
    *,
    model: str,
    temperature: float,
    top_p: float,
    hyperparams: JSONObject,
    max_context_size: int | None = None,
) -> BaseEngine:
    settings = get_settings()
    from veupath_chatbot.ai.engines.cached_anthropic import CachedAnthropicEngine

    kwargs: dict[str, object] = {}
    if max_context_size is not None:
        kwargs["max_context_size"] = max_context_size

    # Anthropic extended thinking constraints:
    # - temperature must be 1 when thinking is enabled
    # - top_p/top_k must not be set when thinking is enabled
    # - budget_tokens must be < max_tokens
    thinking_cfg = (hyperparams or {}).get("thinking")
    thinking_enabled = isinstance(thinking_cfg, dict) and thinking_cfg.get("type") in (
        "enabled",
        "adaptive",
    )
    if thinking_enabled and isinstance(thinking_cfg, dict):
        temperature = 1.0
        # Don't pass top_p — Anthropic rejects it with thinking enabled.
        raw_budget = thinking_cfg.get("budget_tokens", 0)
        budget = int(raw_budget) if isinstance(raw_budget, (int, float)) else 0
        # max_tokens must exceed budget_tokens; default kani value (2048) is
        # too low for medium/high budgets.  Set to budget + 16 384 output room.
        if budget >= 2048:
            kwargs["max_tokens"] = budget + 16_384
    else:
        kwargs["top_p"] = top_p

    return CachedAnthropicEngine(
        api_key=settings.anthropic_api_key,
        model=model,
        temperature=temperature,
        **kwargs,
        **(hyperparams or {}),
    )


def _create_google_engine(
    *,
    model: str,
    temperature: float,
    top_p: float,
    hyperparams: JSONObject,
    max_context_size: int | None = None,
) -> BaseEngine:
    settings = get_settings()
    from kani.engines.google import GoogleAIEngine

    kwargs: dict[str, object] = {}
    if max_context_size is not None:
        kwargs["max_context_size"] = max_context_size
    return GoogleAIEngine(
        api_key=settings.gemini_api_key,
        model=model,
        temperature=temperature,
        top_p=top_p,
        **kwargs,
        **(hyperparams or {}),
    )


def _create_ollama_engine(
    *,
    model: str,
    temperature: float,
    top_p: float,
    hyperparams: JSONObject,
    max_context_size: int | None = None,
) -> BaseEngine:
    settings = get_settings()
    kwargs: dict[str, object] = {}
    if max_context_size is not None:
        kwargs["max_context_size"] = max_context_size
    return OpenAIEngine(
        api_key="ollama",
        model=model,
        api_base=settings.ollama_base_url,
        temperature=temperature,
        top_p=top_p,
        **kwargs,
        **(hyperparams or {}),
    )


[docs] def resolve_effective_model_id( *, model_override: str | None = None, persisted_model_id: str | None = None, ) -> str: """Resolve effective model ID from override and persisted state. Priority: per-request override > persisted per-conversation > server default. """ return model_override or persisted_model_id or get_settings().default_model_id
def _resolve_model_config( *, provider_override: ModelProvider | None = None, model_override: str | None = None, reasoning_effort: ReasoningEffort | None = None, reasoning_budget: int | None = None, ) -> tuple[ModelProvider, str, float, float, JSONObject]: """Resolve provider, model, and hyperparams from overrides and defaults.""" settings = get_settings() default_entry = get_model_entry(settings.default_model_id) if default_entry: provider: ModelProvider = default_entry.provider model = default_entry.model else: provider = "openai" model = settings.openai_model temperature = 0.0 top_p = 1.0 if model_override: entry = get_model_entry(model_override) if entry: provider = entry.provider model = entry.model else: model = model_override if provider_override: provider = provider_override resolved_entry = get_model_entry(model_override or settings.default_model_id) supports_reasoning = resolved_entry.supports_reasoning if resolved_entry else False effective_effort: ReasoningEffort | None = None if supports_reasoning: effective_effort = reasoning_effort if effective_effort is None: effective_effort = settings.default_reasoning_effort merged: dict[str, object] = {} reasoning_hp = build_reasoning_hyperparams( provider, effective_effort, budget_override=reasoning_budget ) merged.update(reasoning_hp) return provider, model, temperature, top_p, cast(JSONObject, merged) def _resolve_context_size( model_override: str | None, context_size_override: int | None, ) -> int | None: """Resolve context size: per-request override > catalog > engine default.""" if context_size_override is not None and context_size_override > 0: return context_size_override entry = get_model_entry(model_override or get_settings().default_model_id) if entry and entry.context_size > 0: return entry.context_size return None
[docs] def create_engine( *, provider_override: ModelProvider | None = None, model_override: str | None = None, reasoning_effort: ReasoningEffort | None = None, temperature: float | None = None, seed: int | None = None, context_size: int | None = None, reasoning_budget: int | None = None, site_id: str = "plasmodb", ) -> BaseEngine: """Create an LLM engine, optionally overridden per-request.""" provider, model, resolved_temperature, top_p, hyperparams = _resolve_model_config( provider_override=provider_override, model_override=model_override, reasoning_effort=reasoning_effort, reasoning_budget=reasoning_budget, ) effective_temperature = ( temperature if temperature is not None else resolved_temperature ) max_ctx = _resolve_context_size(model_override, context_size) if provider == "openai": return _create_openai_engine( model=model, temperature=effective_temperature, top_p=top_p, hyperparams=hyperparams, seed=seed, max_context_size=max_ctx, ) if provider == "anthropic": return _create_anthropic_engine( model=model, temperature=effective_temperature, top_p=top_p, hyperparams=hyperparams, max_context_size=max_ctx, ) if provider == "google": return _create_google_engine( model=model, temperature=effective_temperature, top_p=top_p, hyperparams=hyperparams, max_context_size=max_ctx, ) if provider == "ollama": return _create_ollama_engine( model=model, temperature=effective_temperature, top_p=top_p, hyperparams=hyperparams, max_context_size=max_ctx, ) if provider == "mock": from veupath_chatbot.ai.engines.mock import MockEngine return MockEngine(site_id=site_id) raise ValueError(f"Unknown provider: {provider!r}")
[docs] def create_agent( site_id: str, user_id: UUID | None = None, chat_history: list[ChatMessage] | None = None, strategy_graph: JSONObject | None = None, selected_nodes: JSONObject | None = None, *, provider_override: ModelProvider | None = None, model_override: str | None = None, reasoning_effort: ReasoningEffort | None = None, mentioned_context: str | None = None, disable_rag: bool = False, temperature: float | None = None, seed: int | None = None, context_size: int | None = None, response_tokens: int | None = None, reasoning_budget: int | None = None, ) -> PathfinderAgent: """Create a unified Pathfinder agent instance.""" engine = create_engine( provider_override=provider_override, model_override=model_override, reasoning_effort=reasoning_effort, temperature=temperature, seed=seed, context_size=context_size, reasoning_budget=reasoning_budget, site_id=site_id, ) return PathfinderAgent( engine=engine, site_id=site_id, user_id=user_id, chat_history=chat_history, strategy_graph=strategy_graph, selected_nodes=selected_nodes, mentioned_context=mentioned_context, disable_rag=disable_rag, desired_response_tokens=response_tokens, )