Source code for veupath_chatbot.services.gene_sets.confidence
"""Per-gene composite confidence scoring.
Combines classification, ensemble frequency, and enrichment support
into a single ranked score. Pure computation — no I/O.
"""
from dataclasses import dataclass
[docs]
@dataclass(frozen=True, slots=True)
class GeneConfidenceScore:
"""Confidence breakdown for a single gene."""
gene_id: str
composite_score: float
classification_score: float
ensemble_score: float
enrichment_score: float
_CLASSIFICATION_WEIGHTS: dict[str, float] = {
"TP": 1.0,
"FP": -1.0,
"FN": -0.5,
"TN": 0.0,
}
[docs]
def compute_gene_confidence(
*,
tp_ids: list[str],
fp_ids: list[str],
fn_ids: list[str],
tn_ids: list[str],
ensemble_scores: dict[str, float] | None = None,
enrichment_gene_counts: dict[str, int] | None = None,
max_enrichment_terms: int = 1,
) -> list[GeneConfidenceScore]:
"""Compute per-gene confidence scores, sorted descending by composite."""
ens = ensemble_scores or {}
enrich = enrichment_gene_counts or {}
max_terms = max(max_enrichment_terms, 1)
seen: set[str] = set()
classified: list[tuple[str, float]] = []
for label, ids in [("TP", tp_ids), ("FP", fp_ids), ("FN", fn_ids), ("TN", tn_ids)]:
for gid in ids:
if gid not in seen:
seen.add(gid)
classified.append((gid, _CLASSIFICATION_WEIGHTS[label]))
results: list[GeneConfidenceScore] = []
for gid, cls_score in classified:
ens_score = ens.get(gid, 0.0)
enr_score = min(enrich.get(gid, 0) / max_terms, 1.0)
composite = (cls_score + ens_score + enr_score) / 3.0
results.append(
GeneConfidenceScore(
gene_id=gid,
composite_score=composite,
classification_score=cls_score,
ensemble_score=ens_score,
enrichment_score=enr_score,
)
)
results.sort(key=lambda s: s.composite_score, reverse=True)
return results