Source code for veupath_chatbot.integrations.veupathdb.site_search

"""Integration for VEuPathDB "site search" service.

This is the same backend used by the web UI route `/app/search`.

Important: this service is hosted at the site origin root (e.g. https://plasmodb.org/site-search),
not under the WDK service prefix (e.g. https://plasmodb.org/plasmo/service).

The site-search endpoint is **GET-only** -- POST returns HTTP 500.  Parameters
are passed as query-string key-value pairs:

  ``searchText``           -- search query
  ``docType``              -- restrict to a document type (e.g. "gene", "search")
  ``offset`` / ``numRecords`` -- pagination
  ``restrictToProject``    -- site project id filter
  ``restrictSearchToOrganisms`` -- comma-separated organism names
"""

import re
from urllib.parse import urlparse

import httpx
from tenacity import (
    retry,
    retry_if_exception_type,
    stop_after_attempt,
    wait_exponential,
)

from veupath_chatbot.integrations.veupathdb.site_router import get_site_router
from veupath_chatbot.platform.logging import get_logger
from veupath_chatbot.platform.types import JSONObject

logger = get_logger(__name__)

# Shared client for site-search requests (avoids connection-per-request overhead).
_site_search_client: httpx.AsyncClient | None = None


def _get_site_search_client() -> httpx.AsyncClient:
    """Get or create the shared site-search HTTP client."""
    global _site_search_client
    if _site_search_client is None or _site_search_client.is_closed:
        _site_search_client = httpx.AsyncClient(
            timeout=httpx.Timeout(30.0, read=90.0),
            follow_redirects=True,
            headers={"Accept": "application/json"},
        )
    return _site_search_client


[docs] async def close_site_search_client() -> None: """Close the shared site-search HTTP client (call during app shutdown).""" global _site_search_client if _site_search_client is not None and not _site_search_client.is_closed: await _site_search_client.aclose() _site_search_client = None
[docs] def strip_html_tags(value: str) -> str: # site-search highlights matches with <em> tags. return re.sub(r"</?[^>]+>", "", value or "").strip()