"""Gene search and resolve endpoints."""
from fastapi import APIRouter
from pydantic import BaseModel
router = APIRouter(prefix="/api/v1/sites", tags=["sites"])
[docs]
class GeneSearchResultResponse(BaseModel):
"""A single gene result from site-search."""
geneId: str
displayName: str = ""
organism: str = ""
product: str = ""
geneName: str = ""
geneType: str = ""
location: str = ""
matchedFields: list[str] = []
[docs]
class GeneSearchResponse(BaseModel):
"""Paginated gene search response."""
results: list[GeneSearchResultResponse]
totalCount: int
suggestedOrganisms: list[str] = []
[docs]
class GeneResolveRequest(BaseModel):
"""Request body for gene ID resolution."""
geneIds: list[str]
[docs]
class ResolvedGeneResponse(BaseModel):
"""A resolved gene record."""
geneId: str
displayName: str = ""
organism: str = ""
product: str = ""
geneName: str = ""
geneType: str = ""
location: str = ""
[docs]
class GeneResolveResponse(BaseModel):
"""Gene ID resolution response."""
resolved: list[ResolvedGeneResponse]
unresolved: list[str]
[docs]
class OrganismsResponse(BaseModel):
"""Available organisms for a site."""
organisms: list[str]
[docs]
@router.get("/{siteId}/organisms", response_model=OrganismsResponse)
async def list_organisms(siteId: str) -> OrganismsResponse:
"""Return all available organism names for a site via site-search."""
from veupath_chatbot.services.wdk import query_site_search
data = await query_site_search(
siteId,
search_text="*",
document_type="gene",
limit=1,
)
data_dict = data if isinstance(data, dict) else {}
org_counts = data_dict.get("organismCounts")
orgs = sorted(org_counts.keys()) if isinstance(org_counts, dict) else []
return OrganismsResponse(organisms=orgs)
[docs]
@router.get("/{siteId}/genes/search", response_model=GeneSearchResponse)
async def search_genes(
siteId: str,
q: str = "",
organism: str | None = None,
limit: int = 50,
offset: int = 0,
) -> GeneSearchResponse:
"""Search genes by text using multi-strategy gene lookup."""
from veupath_chatbot.services.gene_lookup import lookup_genes_by_text
data = await lookup_genes_by_text(
siteId, q, organism=organism or None, limit=limit, offset=offset
)
data_dict = data if isinstance(data, dict) else {}
raw_results = data_dict.get("records")
if not isinstance(raw_results, list):
raw_results = []
total = data_dict.get("totalCount", 0)
suggested_raw = data_dict.get("suggestedOrganisms")
suggested_list = suggested_raw if isinstance(suggested_raw, list) else []
suggested: list[str] = [str(s) for s in suggested_list]
results: list[GeneSearchResultResponse] = []
for r in raw_results:
if not isinstance(r, dict):
continue
raw_matched = r.get("matchedFields")
matched_list = raw_matched if isinstance(raw_matched, list) else []
matched_str_list: list[str] = [x for x in matched_list if isinstance(x, str)]
results.append(
GeneSearchResultResponse(
geneId=str(r.get("geneId", "")),
displayName=str(r.get("displayName", "")),
organism=str(r.get("organism", "")),
product=str(r.get("product", "")),
geneName=str(r.get("geneName", "")),
geneType=str(r.get("geneType", "")),
location=str(r.get("location", "")),
matchedFields=matched_str_list,
)
)
return GeneSearchResponse(
results=results,
totalCount=total if isinstance(total, int) else len(results),
suggestedOrganisms=suggested,
)
[docs]
@router.post("/{siteId}/genes/resolve", response_model=GeneResolveResponse)
async def resolve_genes(
siteId: str,
payload: GeneResolveRequest,
) -> GeneResolveResponse:
"""Resolve gene IDs to full records via WDK standard reporter."""
from veupath_chatbot.services.gene_lookup import resolve_gene_ids
data = await resolve_gene_ids(siteId, payload.geneIds)
raw_records = data.get("records") if isinstance(data, dict) else []
if not isinstance(raw_records, list):
raw_records = []
resolved_ids: set[str] = set()
resolved: list[ResolvedGeneResponse] = []
for rec in raw_records:
if not isinstance(rec, dict):
continue
gene_id = str(rec.get("geneId", "")).strip()
if not gene_id:
continue
resolved_ids.add(gene_id)
resolved.append(
ResolvedGeneResponse(
geneId=gene_id,
displayName=str(rec.get("product", gene_id)),
organism=str(rec.get("organism", "")),
product=str(rec.get("product", "")),
geneName=str(rec.get("geneName", "")),
geneType=str(rec.get("geneType", "")),
location=str(rec.get("location", "")),
)
)
unresolved = [gid for gid in payload.geneIds if gid not in resolved_ids]
return GeneResolveResponse(resolved=resolved, unresolved=unresolved)