Source code for veupath_chatbot.services.research.clients.europepmc

"""Europe PMC API client."""

from typing import cast

import httpx

from veupath_chatbot.platform.types import JSONObject, JSONValue
from veupath_chatbot.services.research.clients._base import (
    API_USER_AGENT,
    StandardClient,
    make_citation,
)
from veupath_chatbot.services.research.utils import truncate_text


[docs] class EuropePmcClient(StandardClient): """Client for Europe PMC API.""" _source_name = "europepmc" async def _fetch_raw(self, query: str, *, limit: int) -> list[JSONValue]: url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search" params = { "query": query, "format": "json", "pageSize": str(limit), "resultType": "core", } async with httpx.AsyncClient( timeout=self._timeout, headers={"User-Agent": API_USER_AGENT} ) as client: resp = await client.get(url, params=params, follow_redirects=True) resp.raise_for_status() payload = resp.json() hits = ( payload.get("resultList", {}).get("result", []) if isinstance(payload, dict) else [] ) return list(hits) def _parse_item( self, raw: JSONValue, *, abstract_max_chars: int ) -> tuple[JSONObject, JSONObject] | None: if not isinstance(raw, dict): return None item = raw title_raw = item.get("title") title = title_raw.strip() if isinstance(title_raw, str) else "" year_i: int | None try: pub_year = item.get("pubYear") if pub_year is not None and isinstance(pub_year, (int, str)): if isinstance(pub_year, str) and pub_year.isdigit(): year_i = int(pub_year) elif isinstance(pub_year, int): year_i = pub_year else: year_i = None else: year_i = None except Exception: year_i = None doi_val = item.get("doi") doi: str | None = doi_val if isinstance(doi_val, str) else None pmid_val = item.get("pmid") pmid: str | None = pmid_val if isinstance(pmid_val, str) else None author_str = item.get("authorString") authors = ( [a.strip() for a in author_str.split(",") if a.strip()] if isinstance(author_str, str) else None ) journal = item.get("journalTitle") journal = journal.strip() if isinstance(journal, str) else None link: str | None = None if doi: link = f"https://doi.org/{doi}" elif pmid: link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" abstract = item.get("abstractText") abstract = truncate_text( abstract if isinstance(abstract, str) else None, abstract_max_chars ) result: JSONObject = { "title": title, "year": year_i, "doi": doi, "pmid": pmid, "url": link, "authors": cast(JSONValue, authors), "journalTitle": journal, "abstract": abstract, "snippet": journal, } citation = make_citation( source="europepmc", id_prefix="epmc", title=title or (link or "Europe PMC result"), url=link, authors=authors, year=year_i, doi=doi, pmid=pmid, snippet=abstract or journal, ) return result, citation