Source code for veupath_chatbot.integrations.veupathdb.strategy_api.base

"""StrategyAPI base class with shared infrastructure.

Provides initialization, parameter normalization, and session management
that all mixin classes depend on.
"""

from veupath_chatbot.integrations.veupathdb.client import VEuPathDBClient
from veupath_chatbot.integrations.veupathdb.param_utils import normalize_param_value
from veupath_chatbot.integrations.veupathdb.strategy_api.helpers import (
    CURRENT_USER,
    resolve_wdk_user_id,
)
from veupath_chatbot.platform.logging import get_logger
from veupath_chatbot.platform.types import JSONObject, JSONValue

logger = get_logger(__name__)


def _sort_profile_pattern(pattern: str) -> str:
    """Sort ``%code:Y%code:N%`` entries alphabetically.

    OrthoMCL requires pattern entries in alphabetical order.  The WDK
    frontend always ``.sort()``s before joining — we must too.
    """
    if not pattern.startswith("%") or not pattern.endswith("%"):
        return pattern
    parts = [p for p in pattern.strip("%").split("%") if p]
    return f"%{'%'.join(sorted(parts))}%" if parts else pattern


[docs] class StrategyAPIBase: """Base infrastructure for :class:`StrategyAPI`. Provides ``__init__``, parameter normalization, and WDK session management. Mixin classes inherit from this to access shared state. """
[docs] def __init__(self, client: VEuPathDBClient, user_id: str = CURRENT_USER) -> None: """Initialize the strategy API. :param client: VEuPathDB HTTP client (site-specific). :param user_id: WDK user ID; defaults to ``"current"`` (resolved at first use). """ self.client = client self.user_id = user_id self._session_initialized = False self._boolean_search_cache: dict[str, str] = {} self._answer_param_cache: dict[str, set[str]] = {}
def _normalize_parameters( self, parameters: JSONObject, *, keep_empty: set[str] | None = None, ) -> dict[str, str]: """Normalize parameters to WDK string values; omit empty values. WDK rejects params like ``hard_floor`` with value ``""`` (Cannot be empty). Omitting empty params avoids 422s when a required param is left blank in the UI; the caller should supply a valid value for required params. Params whose value is ``None`` are omitted (never explicitly set). Params whose value is ``""`` (empty string) are kept — the caller explicitly included them, and WDK may accept them via ``allowEmptyValue``. :param parameters: Raw parameter dict. :param keep_empty: Param names that must be kept even when empty (e.g. AnswerParams that WDK requires as ``""``). """ keep = keep_empty or set() out: dict[str, str] = {} for key, value in (parameters or {}).items(): if value is None: continue s = normalize_param_value(value) if s.strip() or key in keep or isinstance(value, str): out[key] = s if s.strip() else "" # OrthoMCL requires profile_pattern entries in alphabetical order. # The frontend monorepo always .sort()s before joining — we must too. if "profile_pattern" in out: out["profile_pattern"] = _sort_profile_pattern(out["profile_pattern"]) return out async def _ensure_session(self) -> None: """Initialize session and resolve user id for mutation endpoints. Some WDK deployments allow GET/POST using `/users/current/...` but do NOT allow PUT/PATCH/DELETE on `/users/current/...` (405 Method Not Allowed). Resolve the concrete user id once and then use `/users/{userId}/...`. """ if self._session_initialized: return if self.user_id == CURRENT_USER: resolved = await resolve_wdk_user_id(self.client) if resolved: logger.info("Resolved WDK user id", resolved_user_id=resolved) self.user_id = resolved self._session_initialized = True async def _expand_tree_params_to_leaves( self, record_type: str, search_name: str, params: dict[str, str], ) -> dict[str, str]: """Expand parent tree nodes to leaf descendants for multi-pick-vocabulary params. WDK tree params with ``countOnlyLeaves=true`` (like organism) silently return 0 results when given a parent node. The WDK frontend's CheckboxTree auto-selects all leaf descendants when a parent is clicked. We replicate that: fetch the search's param specs, find tree params with ``countOnlyLeaves``, and expand any parent values to their leaves. """ import json as _json from veupath_chatbot.domain.parameters.vocab_utils import ( collect_leaf_terms, find_vocab_node, ) try: search_def = await self.client.get( f"/record-types/{record_type}/searches/{search_name}", params={"expandParams": "true"}, ) if not isinstance(search_def, dict): return params search_data = search_def.get("searchData", search_def) if not isinstance(search_data, dict): return params raw_specs = search_data.get("parameters", []) if not isinstance(raw_specs, list): return params result = dict(params) for spec in raw_specs: if not isinstance(spec, dict): continue name = str(spec.get("name", "")) if name not in result: continue # Only expand multi-pick tree vocabs with countOnlyLeaves if spec.get("type") != "multi-pick-vocabulary": continue if not spec.get("countOnlyLeaves"): continue vocab = spec.get("vocabulary") if not isinstance(vocab, dict): continue # Parse current value raw = result[name] try: values = _json.loads(raw) if isinstance(raw, str) else raw except _json.JSONDecodeError: values = [raw] if raw else [] if not isinstance(values, list): continue # Expand each value: if it's a parent node, replace with leaves expanded: list[str] = [] seen: set[str] = set() for val in values: val_str = str(val) node = find_vocab_node(vocab, val_str) if node is None: # Unknown value — pass through if val_str not in seen: expanded.append(val_str) seen.add(val_str) continue leaves = collect_leaf_terms(node) if not leaves: # Already a leaf or empty if val_str not in seen: expanded.append(val_str) seen.add(val_str) else: for leaf in leaves: if leaf not in seen: expanded.append(leaf) seen.add(leaf) if expanded != [str(v) for v in values]: logger.info( "Expanded tree param to leaves", param=name, search=search_name, original_count=len(values), expanded_count=len(expanded), ) result[name] = _json.dumps(expanded) return result except Exception: logger.debug("Failed to expand tree params (non-fatal)") return params async def _expand_profile_pattern_groups( self, record_type: str, pattern: str, ) -> str: """Expand group codes in a profile_pattern to leaf species codes. The WDK ``profile_pattern`` is matched via SQL LIKE against a stored profile string that only contains **leaf** species codes. Group codes (e.g. ``MAMM``) never appear in the DB string and silently return 0. The WDK frontend expands group → leaves automatically via the ``phyletic_indent_map`` tree. We replicate that logic here so the LLM can use intuitive group codes like ``MAMM:N``. """ if not pattern.startswith("%") or not pattern.endswith("%"): return pattern # Parse entries: ["MAMM:N", "pfal:Y", ...] entries = [p for p in pattern.strip("%").split("%") if p] if not entries: return pattern # Fetch the phyletic tree to identify group vs leaf codes. try: search_def = await self.client.get( f"/record-types/{record_type}/searches/GenesByOrthologPattern", params={"expandParams": "true"}, ) if not isinstance(search_def, dict): return pattern # Unwrap searchData wrapper if present. search_data = search_def.get("searchData", search_def) if not isinstance(search_data, dict): return pattern params = search_data.get("parameters", []) if not isinstance(params, list): return pattern indent_vocab: list[JSONValue] = [] for spec in params: if isinstance(spec, dict) and spec.get("name") == "phyletic_indent_map": vocab = spec.get("vocabulary") if isinstance(vocab, list): indent_vocab = vocab break if not indent_vocab: return pattern # Build parent→children map from the indentation tree. # Each entry is [code, depth, null]. codes_at_depth: list[tuple[str, int]] = [] for item in indent_vocab: if isinstance(item, list) and len(item) >= 2: codes_at_depth.append( (str(item[0]), int(str(item[1])) if item[1] is not None else 0) ) # For each code, find its leaf descendants. children_of: dict[str, list[str]] = {} leaf_codes: set[str] = set() for i, (code, depth) in enumerate(codes_at_depth): # Collect all descendants until we hit same or lower depth. descendants: list[str] = [] for j in range(i + 1, len(codes_at_depth)): d_code, d_depth = codes_at_depth[j] if d_depth <= depth: break descendants.append(d_code) if descendants: children_of[code] = descendants else: leaf_codes.add(code) # Expand group codes using CODE:STATE[:QUANTIFIER] encoding. # # Quantifier semantics for groups: # :N:all → absent from ALL members → expand to leaf :N (default for :N) # :N:any → absent from ANY member → cannot express in WDK, drop # :Y:all → present in ALL members → expand to leaf :Y (rare, usually 0) # :Y:any → present in ANY member → cannot express in WDK, drop (default for :Y) # # Leaf codes ignore the quantifier (single species). expanded: list[str] = [] for entry in entries: parts = entry.split(":") if len(parts) < 2: expanded.append(entry) continue code = parts[0] state = parts[1] # Y or N quantifier = parts[2] if len(parts) >= 3 else None if code not in children_of: # Leaf code — pass through (strip quantifier). expanded.append(f"{code}:{state}") continue # Group code — apply quantifier defaults. if quantifier is None: quantifier = "all" if state == "N" else "any" if quantifier == "all": # Expand to all leaf descendants. for desc in children_of[code]: if desc in leaf_codes: expanded.append(f"{desc}:{state}") else: # "any" — cannot express in WDK profile_pattern (OR logic). logger.info( "Dropping group:%s:%s:any from profile_pattern " "(cannot express 'any' in WDK)", code, state, ) return _sort_profile_pattern(f"%{'%'.join(expanded)}%") except Exception: logger.debug("Failed to expand profile_pattern groups (non-fatal)") return pattern async def _standard_report( self, step_id: int, report_config: JSONObject, ) -> JSONObject: """Run a standard report on a step. Shared helper used by report, answer, count, and preview methods. :param step_id: WDK step ID (must be part of a strategy). :param report_config: Report configuration dict. :returns: Standard report response. """ result = await self.client.post( f"/users/{self.user_id}/steps/{step_id}/reports/standard", json={"reportConfig": report_config}, ) return result if isinstance(result, dict) else {}