Source code for veupath_chatbot.platform.pydantic_validation
"""Utilities for parsing Pydantic validation error text.
Some tool frameworks (including Kani) surface tool-argument validation failures as
plain text (Pydantic's human-readable format) rather than structured JSON. This
module provides a best-effort parser so we can return consistent, machine-readable
error payloads to the client.
"""
import re
from typing import cast
from veupath_chatbot.platform.types import JSONArray, JSONObject, JSONValue
_HEADER_RE = re.compile(
r"^\s*(?P<count>\d+)\s+validation error for\s+(?P<model>.+?)\s*$"
)
[docs]
def parse_pydantic_validation_error_text(text: str | None) -> JSONObject | None:
"""Parse Pydantic v2 ValidationError string into a structured payload.
Returns a dict with keys:
- model: string (best-effort)
- errorCount: int | None
- errors: list[dict] (best-effort)
- raw: original text
:param text: Pydantic error text (or None).
:returns: Parsed validation summary or None.
"""
if not text:
return None
if "validation error for" not in text:
return None
lines = [ln.rstrip("\n") for ln in str(text).splitlines()]
header = next((ln for ln in lines if ln.strip()), "").strip()
m = _HEADER_RE.match(header)
if not m:
return None
model = (m.group("model") or "").strip() or None
try:
error_count: int | None = int(m.group("count"))
except Exception:
error_count = None
errors: JSONArray = []
current_loc: str | None = None
def _parse_meta(meta_str: str) -> dict[str, str]:
meta: dict[str, str] = {}
for part in [p.strip() for p in meta_str.split(",")]:
if not part or "=" not in part:
continue
key, value = part.split("=", 1)
meta[key.strip()] = value.strip()
return meta
for ln in lines[1:]:
if not ln.strip():
continue
# "loc" lines are unindented; details are indented.
if ln.startswith(" "):
if current_loc is None:
continue
detail = ln.strip()
msg = detail
meta: dict[str, str] = {}
if "[" in detail and detail.endswith("]"):
msg_part, meta_part = detail.split("[", 1)
msg = msg_part.strip() or detail
meta = _parse_meta(meta_part[:-1])
err: JSONObject = {
"loc": [current_loc],
"msg": msg,
}
if meta.get("type"):
type_val = meta.get("type")
if isinstance(type_val, str):
err["type"] = type_val
if meta:
# dict[str, str] is a valid JSONValue (it's a dict[str, JSONValue])
err["meta"] = cast(JSONValue, meta)
errors.append(err)
else:
current_loc = ln.strip()
return {
"model": model,
"errorCount": error_count,
"errors": errors,
"raw": text,
}