Source code for veupath_chatbot.services.research.clients.semanticscholar

"""Semantic Scholar API client."""

from typing import cast

import httpx

from veupath_chatbot.platform.types import JSONObject, JSONValue
from veupath_chatbot.services.research.clients._base import (
    API_USER_AGENT,
    StandardClient,
    make_citation,
)
from veupath_chatbot.services.research.utils import truncate_text


[docs] class SemanticScholarClient(StandardClient): """Client for Semantic Scholar API.""" _source_name = "semanticscholar" async def _fetch_raw(self, query: str, *, limit: int) -> list[JSONValue]: url = "https://api.semanticscholar.org/graph/v1/paper/search" params = { "query": query, "limit": str(limit), "fields": "title,year,authors,url,abstract,journal,externalIds", } async with httpx.AsyncClient( timeout=self._timeout, headers={"User-Agent": API_USER_AGENT} ) as client: resp = await client.get(url, params=params, follow_redirects=True) resp.raise_for_status() payload = resp.json() items = payload.get("data", []) if isinstance(payload, dict) else [] return list(items) def _parse_item( self, raw: JSONValue, *, abstract_max_chars: int ) -> tuple[JSONObject, JSONObject] | None: if not isinstance(raw, dict): return None item = raw title = str(item.get("title") or "").strip() year_raw = item.get("year") year: int | None = year_raw if isinstance(year_raw, int) else None url_val = item.get("url") url_item: str | None = url_val if isinstance(url_val, str) else None authors: list[str] | None = None raw_authors = item.get("authors") if isinstance(raw_authors, list): authors = [ str(a.get("name")) for a in raw_authors if isinstance(a, dict) and a.get("name") ] abstract_raw = item.get("abstract") abstract = truncate_text( abstract_raw if isinstance(abstract_raw, str) else None, abstract_max_chars ) journal: str | None = None j = item.get("journal") if isinstance(j, dict) and j.get("name"): journal = str(j.get("name")) ext = item.get("externalIds") doi: str | None = None pmid: str | None = None if isinstance(ext, dict): doi_val = ext.get("DOI") if isinstance(doi_val, str): doi = doi_val pmid_val = ext.get("PubMed") if isinstance(pmid_val, str): pmid = pmid_val result_url = url_item or (f"https://doi.org/{doi}" if doi else None) result: JSONObject = { "title": title, "year": year, "doi": doi, "pmid": pmid, "url": result_url, "authors": cast(JSONValue, authors), "journalTitle": journal, "abstract": abstract, "snippet": abstract or journal, } citation = make_citation( source="semanticscholar", id_prefix="s2", title=title or (url_item or "Semantic Scholar result"), url=result_url, authors=authors, year=year, doi=doi, pmid=pmid, snippet=abstract or journal, ) return result, citation