Source code for veupath_chatbot.services.experiment.stats

"""Shared statistical utilities for experiment analysis."""

import math


[docs] def hypergeometric_log_sf(x: int, n: int, k: int, m: int) -> float: """Approximate log survival function for hypergeometric distribution. Uses a normal approximation of P(X >= x) for speed. Returns 0.0 (i.e. p=1.0) when the observed count is at or below the mean. Parameters ---------- x: Number of observed successes. n: Population size (background). k: Number of success states in the population (result set size). m: Number of draws (gene set size). """ mean = k * m / n var = k * m * (n - k) * (n - m) / (n * n * max(n - 1, 1)) std = max(math.sqrt(var), 1e-12) z = (x - mean) / std if z <= 0: return 0.0 # no enrichment -> p = 1.0 log_sf = math.log(max(math.erfc(z / math.sqrt(2)) / 2, 1e-300)) return log_sf