Source code for veupath_chatbot.services.wdk.helpers

"""Shared WDK helpers for record parsing, attribute inspection, and param merging.

These functions are used by experiment results, gene set, and workbench
endpoints to work with WDK record types, primary keys, and analysis
parameters. Previously duplicated across multiple router modules.
"""

from veupath_chatbot.platform.types import JSONObject, JSONValue

# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------

_SORTABLE_WDK_TYPES = {"number", "float", "integer", "double"}

DETAIL_ATTRIBUTE_LIMIT = 50
"""Max attributes to request when fetching a single record detail.

WDK record types can have thousands of attributes (e.g. 3000+ expression
columns on transcript).  Requesting all would timeout.  The first ~50
``isInReport`` attributes cover core gene/record fields.
"""

_SCORE_ATTRIBUTE_KEYWORDS = {
    "score",
    "e_value",
    "evalue",
    "bit_score",
    "bitscore",
    "p_value",
    "pvalue",
    "fold_change",
    "log_fc",
    "confidence",
}


# ---------------------------------------------------------------------------
# Attribute classification
# ---------------------------------------------------------------------------


[docs] def is_sortable(attr_type: str | None) -> bool: """Return ``True`` if a WDK attribute type supports numeric sorting.""" if not attr_type: return False return attr_type.lower() in _SORTABLE_WDK_TYPES
[docs] def is_suggested_score(name: str) -> bool: """Heuristic: flag well-known score attributes as suggested for ranking.""" lower = name.lower() return any(kw in lower for kw in _SCORE_ATTRIBUTE_KEYWORDS)
# --------------------------------------------------------------------------- # Primary key extraction # ---------------------------------------------------------------------------
[docs] def extract_pk(record: JSONObject) -> str | None: """Extract primary key string from a WDK record. WDK records use ``"id": [{name, value}, ...]`` for the composite primary key. Returns the first part's value, stripped. """ pk = record.get("id") if isinstance(pk, list) and pk: first = pk[0] if isinstance(first, dict): val = first.get("value") if isinstance(val, str): return val.strip() return None
[docs] def extract_record_ids( records: object, *, preferred_key: str | None = None, ) -> list[str]: """Extract gene/record IDs from WDK standard report records. If *preferred_key* is given, looks it up in each record's ``attributes`` dict first; falls back to the primary-key array. Accepts ``object`` so callers do not need to narrow the type before calling (e.g. ``answer.get("records")`` may return ``None``). :param records: WDK answer records (expected ``list[dict]``). :param preferred_key: Attribute name to prefer over primary key. :returns: List of non-empty record IDs. """ if not isinstance(records, list): return [] ids: list[str] = [] for rec in records: if not isinstance(rec, dict): continue extracted: str | None = None if preferred_key: attrs = rec.get("attributes") if isinstance(attrs, dict): val = attrs.get(preferred_key) if isinstance(val, str) and val.strip(): extracted = val.strip() if extracted is None: extracted = extract_pk(rec) if extracted: ids.append(extracted) return ids
# --------------------------------------------------------------------------- # Primary key ordering # ---------------------------------------------------------------------------
[docs] def order_primary_key( pk_parts: list[JSONObject], pk_refs: list[str], pk_defaults: dict[str, str], ) -> list[JSONObject]: """Reorder and fill primary key parts to match WDK record class definition. WDK requires PK columns in the exact order defined by ``primaryKeyColumnRefs``. Step reports may omit columns like ``project_id`` and may return them in a different order. :param pk_parts: Client-provided PK parts (``[{name, value}, ...]``). :param pk_refs: Column names in record-class order. :param pk_defaults: Default values for missing columns (e.g. ``project_id``). :returns: Ordered PK parts matching ``pk_refs``. """ pk_by_name: dict[str, str] = { str(p.get("name", "")): str(p.get("value", "")) for p in pk_parts if isinstance(p, dict) } ordered: list[JSONObject] = [] for col in pk_refs: if not isinstance(col, str): continue value = pk_by_name.get(col) or pk_defaults.get(col) or "" ordered.append({"name": col, "value": value}) return ordered
# --------------------------------------------------------------------------- # Attribute list building # ---------------------------------------------------------------------------
[docs] def build_attribute_list(attrs_raw: object) -> list[JSONObject]: """Build a normalized attribute list from WDK record type info. Handles both dict (``attributesMap``) and list (expanded) formats. Each entry includes: ``name``, ``displayName``, ``help``, ``type``, ``isDisplayable``, ``isSortable``, ``isSuggested``. This consolidates the 40+ line if/elif blocks previously copy-pasted in both ``get_experiment_attributes`` and ``get_gene_set_attributes``. :param attrs_raw: Raw attributes value from the record type info. :returns: Normalized attribute list. """ attributes: list[JSONObject] = [] if isinstance(attrs_raw, dict): for name, meta in attrs_raw.items(): if isinstance(meta, dict): attr = _build_single_attribute(str(name), meta, name_fallback=str(name)) attributes.append(attr) elif isinstance(attrs_raw, list): for meta in attrs_raw: if isinstance(meta, dict): attr_name = str(meta.get("name", "")) attr = _build_single_attribute(attr_name, meta, name_fallback=attr_name) attributes.append(attr) return attributes
def _build_single_attribute( name: str, meta: JSONObject, *, name_fallback: str, ) -> JSONObject: """Build a single normalized attribute dict from WDK metadata.""" raw_type = meta.get("type") attr_type = str(raw_type) if isinstance(raw_type, str) else None sortable = is_sortable(attr_type) return { "name": name, "displayName": meta.get("displayName", name_fallback), "help": meta.get("help"), "type": attr_type, "isDisplayable": meta.get("isDisplayable", True), "isSortable": sortable, "isSuggested": sortable and is_suggested_score(name), } # --------------------------------------------------------------------------- # Detail attribute extraction # ---------------------------------------------------------------------------
[docs] def extract_detail_attributes( attrs_raw: object, ) -> tuple[list[str], dict[str, str]]: """Extract attribute names and display names for the record detail view. Filters to attributes with ``isInReport=True`` (skipping composite overview fields) and caps at :data:`DETAIL_ATTRIBUTE_LIMIT` so that record types with thousands of attributes don't timeout WDK. Handles both dict (``attributesMap``) and list (expanded) formats. :returns: ``(attribute_names, display_name_map)`` """ items: list[tuple[str, JSONObject]] = [] if isinstance(attrs_raw, dict): for name, meta in attrs_raw.items(): if isinstance(meta, dict): items.append((str(name), meta)) elif isinstance(attrs_raw, list): for meta in attrs_raw: if isinstance(meta, dict): items.append((str(meta.get("name", "")), meta)) names: list[str] = [] display_names: dict[str, str] = {} for name, meta in items: if not meta.get("isInReport", meta.get("isDisplayable", False)): continue names.append(name) dn = meta.get("displayName") display_names[name] = str(dn) if isinstance(dn, str) else name if len(names) >= DETAIL_ATTRIBUTE_LIMIT: break return names, display_names
# --------------------------------------------------------------------------- # Analysis parameter merging # ---------------------------------------------------------------------------
[docs] def merge_analysis_params( form_meta: JSONValue, user_params: JSONObject, ) -> JSONObject: """Merge WDK form defaults with user-supplied parameters. Always extracts defaults from the WDK form metadata and layers user-supplied parameters on top so that required fields are never missing (which would cause WDK 422 errors). After merging, vocabulary params (``single-pick-vocabulary``, ``multi-pick-vocabulary``) are re-encoded as JSON arrays using the form metadata. This ensures that user-supplied plain strings don't bypass the encoding required by ``AbstractEnumParam.convertToTerms()``. """ from veupath_chatbot.services.experiment.enrichment import ( _extract_default_params, encode_vocab_params, ) defaults = _extract_default_params(form_meta) merged: JSONObject = {**defaults, **user_params} return encode_vocab_params(merged, form_meta)