"""Validation of search parameter values."""
from collections.abc import Awaitable, Callable
from typing import cast
from veupath_chatbot.domain.parameters.canonicalize import ParameterCanonicalizer
from veupath_chatbot.domain.parameters.normalize import ParameterNormalizer
from veupath_chatbot.domain.parameters.specs import (
adapt_param_specs,
extract_param_specs,
find_missing_required_params,
unwrap_search_data,
)
from veupath_chatbot.integrations.veupathdb.client import (
encode_context_param_values_for_wdk,
)
from veupath_chatbot.integrations.veupathdb.discovery import get_discovery_service
from veupath_chatbot.integrations.veupathdb.factory import get_wdk_client
from veupath_chatbot.platform.errors import ValidationError
from veupath_chatbot.platform.logging import get_logger
from veupath_chatbot.platform.types import JSONObject, JSONValue
from .param_resolution import (
_extract_param_names,
_filter_context_values,
expand_search_details_with_params,
)
logger = get_logger(__name__)
[docs]
async def validate_search_params(
*,
site_id: str,
record_type: str,
search_name: str,
context_values: JSONObject | None,
) -> JSONObject:
"""Validate and canonicalize search parameters for UI consumption.
Returns a stable payload:
{ "validation": { "isValid": bool, "normalizedContextValues": {...}, "errors": {...} } }
The goal is to keep the frontend a consumer of backend normalization + validation,
without requiring the UI to interpret raw WDK payloads.
"""
raw_context = context_values or {}
normalized_context: JSONObject = {}
details: JSONObject | None = None
allowed: set[str] = set()
try:
details = await expand_search_details_with_params(
site_id=site_id,
record_type=record_type,
search_name=search_name,
context_values=raw_context,
)
allowed = _extract_param_names(details if isinstance(details, dict) else {})
except Exception as exc:
return {
"validation": {
"isValid": False,
"normalizedContextValues": {},
"errors": {
"general": [f"Failed to load search metadata: {exc}"],
"byKey": {},
},
}
}
filtered_context = _filter_context_values(raw_context, allowed)
spec_payload = unwrap_search_data(details) or {}
spec_map = adapt_param_specs(spec_payload)
raw_specs = extract_param_specs(spec_payload)
try:
canonicalizer = ParameterCanonicalizer(spec_map)
normalized_context = canonicalizer.canonicalize(filtered_context)
except ValidationError as exc:
by_key: dict[str, list[str]] = {}
general: list[str] = []
for err_raw in (exc.errors or []) or []:
if not isinstance(err_raw, dict):
continue
param_raw = err_raw.get("param") or err_raw.get("path")
param = param_raw if isinstance(param_raw, str) else None
message_raw = err_raw.get("message") or err_raw.get("detail")
message = (
message_raw
if isinstance(message_raw, str)
else (exc.detail or exc.title)
)
if param:
by_key.setdefault(param, []).append(message)
else:
general.append(str(message))
if not general:
general = [exc.detail or exc.title]
return {
"validation": {
"isValid": False,
"normalizedContextValues": {},
"errors": {
"general": cast(JSONValue, general),
"byKey": cast(JSONValue, by_key),
},
}
}
# Required checks using raw WDK specs (keeps semantics aligned with WDK).
missing = find_missing_required_params(raw_specs, normalized_context)
if missing:
by_key = {name: ["Required"] for name in missing}
return {
"validation": {
"isValid": False,
"normalizedContextValues": normalized_context,
"errors": {
"general": cast(
JSONValue,
[f"Missing required parameters: {', '.join(missing)}"],
),
"byKey": cast(JSONValue, by_key),
},
}
}
return {
"validation": {
"isValid": True,
"normalizedContextValues": normalized_context,
"errors": {"general": [], "byKey": {}},
}
}
[docs]
async def validate_parameters(
*,
site_id: str,
record_type: str,
search_name: str,
parameters: JSONObject,
resolve_record_type_for_search: Callable[
[str | None, str | None, bool, bool], Awaitable[str | None]
],
find_record_type_hint: Callable[[str, str | None], Awaitable[str | None]],
extract_vocab_options: Callable[[JSONObject], list[str]],
) -> None:
"""Validate parameters against WDK search specs.
Normalizes *parameters* in-place and raises ``ValidationError`` when
the search is unknown, extra/unknown parameters are provided, or
required parameters are missing.
"""
discovery = get_discovery_service()
resolved_record_type = await resolve_record_type_for_search(
record_type, search_name, True, True
)
if resolved_record_type is None:
record_type_hint = await find_record_type_hint(search_name, record_type)
raise ValidationError(
title=f"Unknown or invalid search: {search_name}",
detail="Search name not found in any record type.",
errors=[
{
"context": {
"recordType": record_type,
"recordTypeHint": record_type_hint,
}
}
],
)
try:
# Many WDK param vocabularies are context-dependent. Prefer the POST variant
# that accepts `contextParamValues`, so values like "maximum2" validate when
# the chosen sample sets require them.
wdk_client = get_wdk_client(site_id)
context = encode_context_param_values_for_wdk(parameters)
try:
details = await wdk_client.get_search_details_with_params(
resolved_record_type,
search_name,
context=context,
expand_params=True,
)
except Exception as exc:
# Fallback: non-contextual specs (may be incomplete for dependent params).
logger.warning(
"Contextual param fetch failed, falling back to non-contextual specs",
record_type=resolved_record_type,
search_name=search_name,
error=str(exc),
)
details = await discovery.get_search_details(
site_id, resolved_record_type, search_name, expand_params=True
)
except Exception as exc:
searches = await discovery.get_searches(site_id, resolved_record_type)
available: list[str] = []
for s in searches:
if not isinstance(s, dict):
continue
name_raw = s.get("name")
url_seg_raw = s.get("urlSegment")
name = name_raw if isinstance(name_raw, str) else None
url_seg = url_seg_raw if isinstance(url_seg_raw, str) else None
available_val = name or url_seg
if isinstance(available_val, str):
available.append(available_val)
hint_record_type: str | None = None
try:
record_types = await discovery.get_record_types(site_id)
for rt in record_types:
if not isinstance(rt, dict):
continue
url_seg_raw = rt.get("urlSegment")
name_raw = rt.get("name")
url_seg = url_seg_raw if isinstance(url_seg_raw, str) else None
name = name_raw if isinstance(name_raw, str) else None
rt_name = url_seg or name or ""
if not rt_name or rt_name == record_type:
continue
rt_searches = await discovery.get_searches(site_id, rt_name)
match: JSONObject | None = None
for s in rt_searches:
if not isinstance(s, dict):
continue
s_url_seg_raw = s.get("urlSegment")
s_name_raw = s.get("name")
s_url_seg = (
s_url_seg_raw if isinstance(s_url_seg_raw, str) else None
)
s_name = s_name_raw if isinstance(s_name_raw, str) else None
if s_url_seg == search_name or s_name == search_name:
match = s
break
if match:
hint_record_type = rt_name
break
except Exception as hint_exc:
logger.warning(
"Record type hint resolution failed",
search_name=search_name,
error=str(hint_exc),
)
hint_record_type = None
raise ValidationError(
title=f"Unknown or invalid search: {search_name}",
detail=str(exc),
errors=[
{
"context": {
"recordType": resolved_record_type,
"availableSearches": cast(JSONValue, available),
"recordTypeHint": hint_record_type,
}
}
],
) from exc
spec_payload = unwrap_search_data(details) or {}
param_specs = extract_param_specs(spec_payload)
param_spec_map = adapt_param_specs(spec_payload)
normalizer = ParameterNormalizer(param_spec_map)
normalized = normalizer.normalize(parameters)
parameters.clear()
parameters.update(normalized)
param_names: set[str] = set()
for p in param_specs:
if not isinstance(p, dict):
continue
name_raw = p.get("name")
if isinstance(name_raw, str):
param_names.add(name_raw)
extra_params = [key for key in parameters if key not in param_names]
if extra_params:
raise ValidationError(
title="Unknown parameters provided",
errors=[
{
"context": {
"recordType": resolved_record_type,
"searchName": search_name,
"unknown": cast(JSONValue, extra_params),
"known": cast(JSONValue, sorted(param_names)[:50]),
"truncated": len(param_names) > 50,
}
}
],
)
missing = find_missing_required_params(param_specs, parameters)
if missing:
options: JSONObject = {}
for param_spec in param_specs:
if not isinstance(param_spec, dict):
continue
name_raw = param_spec.get("name")
name = name_raw if isinstance(name_raw, str) else None
if not name or name not in missing:
continue
vocab_raw = param_spec.get("vocabulary")
opts: list[str] = []
if isinstance(vocab_raw, dict):
opts = extract_vocab_options(vocab_raw)
elif isinstance(vocab_raw, list):
if vocab_raw and isinstance(vocab_raw[0], list):
opts = [
str(v[0]) for v in vocab_raw[:50] if isinstance(v, list) and v
]
else:
opts = [str(v) for v in vocab_raw[:50]]
if opts:
options[name] = cast(
JSONValue,
{
"examples": cast(JSONValue, opts),
"truncated": len(opts) >= 50,
},
)
raise ValidationError(
title="Missing required parameters",
errors=[
{
"context": {
"recordType": resolved_record_type,
"searchName": search_name,
"missing": cast(JSONValue, missing),
"options": options,
}
}
],
)
return None