Source code for veupath_chatbot.transport.http.schemas.gene_sets

"""HTTP request/response schemas for gene sets."""

from typing import Literal

from pydantic import BaseModel, Field

from veupath_chatbot.platform.types import JSONObject
from veupath_chatbot.services.experiment.types.core import EnrichmentAnalysisType
from veupath_chatbot.services.gene_sets.types import GeneSetSource

SetOperation = Literal["intersect", "union", "minus"]


[docs] class CreateGeneSetRequest(BaseModel): """Create a gene set from IDs, strategy, or upload.""" name: str = Field(min_length=1, max_length=200) site_id: str = Field(alias="siteId") gene_ids: list[str] = Field(alias="geneIds") source: GeneSetSource = "paste" wdk_strategy_id: int | None = Field(None, alias="wdkStrategyId") wdk_step_id: int | None = Field(None, alias="wdkStepId") search_name: str | None = Field(None, alias="searchName") record_type: str | None = Field(None, alias="recordType") parameters: dict[str, str] | None = None model_config = {"populate_by_name": True}
[docs] class GeneSetResponse(BaseModel): """Gene set response DTO.""" id: str name: str site_id: str = Field(alias="siteId") gene_ids: list[str] = Field(alias="geneIds") source: GeneSetSource gene_count: int = Field(alias="geneCount") wdk_strategy_id: int | None = Field(None, alias="wdkStrategyId") wdk_step_id: int | None = Field(None, alias="wdkStepId") search_name: str | None = Field(None, alias="searchName") record_type: str | None = Field(None, alias="recordType") parameters: dict[str, str] | None = None parent_set_ids: list[str] = Field(default_factory=list, alias="parentSetIds") operation: SetOperation | None = None created_at: str = Field(alias="createdAt") step_count: int = Field(1, alias="stepCount") model_config = {"populate_by_name": True}
[docs] class SetOperationRequest(BaseModel): """Perform set operations between two gene sets.""" set_a_id: str = Field(alias="setAId") set_b_id: str = Field(alias="setBId") operation: SetOperation name: str = Field(min_length=1, max_length=200) model_config = {"populate_by_name": True}
[docs] class GeneSetEnrichRequest(BaseModel): """Run enrichment on a gene set.""" enrichment_types: list[EnrichmentAnalysisType] = Field(alias="enrichmentTypes") model_config = {"populate_by_name": True}
[docs] class EnsembleScoringRequest(BaseModel): """Compute ensemble frequency scores across multiple gene sets.""" gene_set_ids: list[str] = Field(alias="geneSetIds", min_length=2) positive_controls: list[str] | None = Field(None, alias="positiveControls") model_config = {"populate_by_name": True}
[docs] class ReverseSearchRequest(BaseModel): """Rank user's gene sets by recall of given positive genes.""" positive_gene_ids: list[str] = Field(alias="positiveGeneIds", min_length=1) negative_gene_ids: list[str] | None = Field(None, alias="negativeGeneIds") site_id: str = Field(alias="siteId") model_config = {"populate_by_name": True}
[docs] class ReverseSearchResultItem(BaseModel): """A single ranked gene set in reverse search results.""" gene_set_id: str = Field(alias="geneSetId") name: str search_name: str | None = Field(None, alias="searchName") recall: float precision: float f1: float result_count: int = Field(alias="resultCount") overlap_count: int = Field(alias="overlapCount") model_config = {"populate_by_name": True}
[docs] class RunGeneSetAnalysisRequest(BaseModel): """Run a WDK step analysis on a gene set.""" analysis_name: str = Field(alias="analysisName", min_length=1) parameters: JSONObject = Field(default_factory=dict) model_config = {"populate_by_name": True}
[docs] class GeneConfidenceRequest(BaseModel): """Compute per-gene confidence scores from classification data.""" tp_ids: list[str] = Field(alias="tpIds") fp_ids: list[str] = Field(alias="fpIds") fn_ids: list[str] = Field(alias="fnIds") tn_ids: list[str] = Field(alias="tnIds") ensemble_scores: dict[str, float] | None = Field(None, alias="ensembleScores") enrichment_gene_counts: dict[str, int] | None = Field( None, alias="enrichmentGeneCounts" ) max_enrichment_terms: int = Field(1, alias="maxEnrichmentTerms", ge=1) model_config = {"populate_by_name": True}
[docs] class GeneConfidenceScoreResponse(BaseModel): """Single gene confidence score in the response.""" gene_id: str = Field(alias="geneId") composite_score: float = Field(alias="compositeScore") classification_score: float = Field(alias="classificationScore") ensemble_score: float = Field(alias="ensembleScore") enrichment_score: float = Field(alias="enrichmentScore") model_config = {"populate_by_name": True}