Source code for veupath_chatbot.services.experiment.custom_enrichment
"""Custom gene set enrichment analysis against experiment results."""
import math
from typing import TypedDict
from veupath_chatbot.services.experiment.stats import hypergeometric_log_sf
from veupath_chatbot.services.experiment.types import Experiment
[docs]
class CustomEnrichmentResult(TypedDict):
"""Return shape of :func:`run_custom_enrichment`."""
geneSetName: str
geneSetSize: int
overlapCount: int
overlapGenes: list[str]
backgroundSize: int
tpCount: int
foldEnrichment: float
pValue: float
oddsRatio: float
[docs]
def run_custom_enrichment(
exp: Experiment,
gene_ids: list[str],
gene_set_name: str,
) -> CustomEnrichmentResult:
"""Test enrichment of a custom gene set against the experiment results.
Computes overlap, fold enrichment, p-value (hypergeometric), and odds ratio.
"""
result_ids = exp.result_gene_ids()
tp_ids, _, fn_ids, tn_ids = exp.classification_id_sets()
gene_set = set(gene_ids)
overlap = result_ids & gene_set
tp_in_overlap = tp_ids & gene_set
background = len(result_ids) + len(fn_ids) + len(tn_ids)
background = max(background, 1)
result_size = max(len(result_ids), 1)
gene_set_size = max(len(gene_set), 1)
expected = result_size * gene_set_size / background
fold = len(overlap) / expected if expected > 0 else 0.0
a = len(overlap) # in result AND in gene set
b = gene_set_size - a # in gene set but NOT in result
c = result_size - a # in result but NOT in gene set
d = max(background - a - b - c, 0) # in neither
odds = (a * max(d, 1)) / (max(b, 1) * max(c, 1))
log_p = hypergeometric_log_sf(len(overlap), background, result_size, gene_set_size)
p_value = min(1.0, math.exp(log_p))
return {
"geneSetName": gene_set_name,
"geneSetSize": len(gene_set),
"overlapCount": len(overlap),
"overlapGenes": sorted(overlap),
"backgroundSize": background,
"tpCount": len(tp_in_overlap),
"foldEnrichment": round(fold, 4),
"pValue": p_value,
"oddsRatio": round(odds, 4),
}