Source code for veupath_chatbot.services.research.clients.openalex

"""OpenAlex API client."""

from typing import cast

import httpx

from veupath_chatbot.platform.types import JSONObject, JSONValue
from veupath_chatbot.services.research.clients._base import (
    API_USER_AGENT,
    StandardClient,
    make_citation,
)
from veupath_chatbot.services.research.utils import truncate_text


[docs] class OpenAlexClient(StandardClient): """Client for OpenAlex API.""" _source_name = "openalex" async def _fetch_raw(self, query: str, *, limit: int) -> list[JSONValue]: url = "https://api.openalex.org/works" params = {"search": query, "per-page": str(limit)} async with httpx.AsyncClient( timeout=self._timeout, headers={"User-Agent": API_USER_AGENT} ) as client: resp = await client.get(url, params=params, follow_redirects=True) resp.raise_for_status() payload = resp.json() items = payload.get("results", []) if isinstance(payload, dict) else [] return list(items) def _parse_item( self, raw: JSONValue, *, abstract_max_chars: int ) -> tuple[JSONObject, JSONObject] | None: if not isinstance(raw, dict): return None item = raw title = str(item.get("title") or "").strip() year_i = item.get("publication_year") year = ( int(year_i) if isinstance(year_i, (int, str)) and str(year_i).isdigit() else None ) doi = item.get("doi") doi = str(doi).replace("https://doi.org/", "") if isinstance(doi, str) else None id_val = item.get("id") url_item: str | None = id_val if isinstance(id_val, str) else None journal: str | None = None hv = item.get("host_venue") if isinstance(hv, dict): jname = hv.get("display_name") journal = str(jname).strip() if isinstance(jname, str) else None authors: list[str] | None = None auths = item.get("authorships") if isinstance(auths, list): authors = [] for a in auths: if not isinstance(a, dict): continue au = a.get("author") if isinstance(au, dict) and au.get("display_name"): authors.append(str(au.get("display_name"))) abstract: str | None = None inv = item.get("abstract_inverted_index") if isinstance(inv, dict): pairs: list[tuple[int, str]] = [] for word, idxs in inv.items(): if not isinstance(word, str) or not isinstance(idxs, list): continue for i in idxs: if isinstance(i, int): pairs.append((i, word)) if pairs: pairs.sort(key=lambda x: x[0]) abstract = " ".join(w for _, w in pairs) abstract = truncate_text(abstract, abstract_max_chars) result_url = f"https://doi.org/{doi}" if doi else url_item result: JSONObject = { "title": title, "year": year, "doi": doi, "url": result_url, "authors": cast(JSONValue, authors), "journalTitle": journal, "abstract": abstract, "snippet": abstract or journal, } citation = make_citation( source="openalex", id_prefix="openalex", title=title or (url_item or "OpenAlex result"), url=result_url, authors=authors, year=year, doi=doi, snippet=abstract or journal, ) return result, citation