Source code for veupath_chatbot.services.experiment.report

"""Self-contained HTML report generation for experiments.

Generates a single-file HTML document with embedded styles, tables,
and inline SVG charts. No external dependencies required.
"""

import html
from datetime import UTC, datetime

from veupath_chatbot.services.experiment.types import (
    BootstrapResult,
    Experiment,
    RankMetrics,
    StepAnalysisResult,
)

_CSS = """
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; max-width: 900px; margin: 0 auto; padding: 24px; color: #1a1a2e; background: #fff; }
h1 { font-size: 1.5rem; margin-bottom: 4px; }
h2 { font-size: 1.1rem; margin-top: 2rem; border-bottom: 1px solid #e5e7eb; padding-bottom: 6px; }
h3 { font-size: 0.9rem; margin-top: 1rem; color: #6b7280; }
table { width: 100%; border-collapse: collapse; margin: 8px 0; font-size: 0.85rem; }
th, td { padding: 6px 12px; text-align: left; border-bottom: 1px solid #e5e7eb; }
th { background: #f9fafb; font-weight: 600; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.05em; color: #6b7280; }
td { font-family: 'SF Mono', Consolas, monospace; }
.numeric { text-align: right; }
.badge { display: inline-block; padding: 2px 8px; border-radius: 12px; font-size: 0.7rem; font-weight: 600; }
.badge-green { background: #dcfce7; color: #166534; }
.badge-blue { background: #dbeafe; color: #1e40af; }
.badge-amber { background: #fef3c7; color: #92400e; }
.badge-red { background: #fee2e2; color: #991b1b; }
.summary-grid { display: grid; grid-template-columns: repeat(3, 1fr); gap: 12px; margin: 12px 0; }
.summary-card { border: 1px solid #e5e7eb; border-radius: 8px; padding: 12px; text-align: center; }
.summary-card .label { font-size: 0.7rem; text-transform: uppercase; letter-spacing: 0.05em; color: #6b7280; }
.summary-card .value { font-size: 1.5rem; font-weight: 700; font-family: 'SF Mono', Consolas, monospace; margin: 4px 0; }
.gene-list { max-height: 200px; overflow-y: auto; border: 1px solid #e5e7eb; border-radius: 4px; padding: 8px; font-size: 0.75rem; font-family: monospace; }
details summary { cursor: pointer; font-size: 0.85rem; font-weight: 600; color: #374151; }
.meta { font-size: 0.8rem; color: #6b7280; }
.ci { font-size: 0.7rem; color: #9ca3af; }
@media print { body { max-width: 100%; } }
"""


[docs] def generate_experiment_report(experiment: Experiment) -> str: """Generate a self-contained HTML report for an experiment. :param experiment: Full experiment object with results. :returns: Complete HTML string. """ parts: list[str] = [] parts.append(_header(experiment)) parts.append(_config_section(experiment)) if experiment.rank_metrics: parts.append(_rank_metrics_section(experiment.rank_metrics)) if experiment.metrics: parts.append(_classification_section(experiment)) if experiment.robustness: parts.append(_robustness_section(experiment.robustness)) if experiment.step_analysis: parts.append(_step_analysis_section(experiment.step_analysis)) if experiment.enrichment_results: parts.append(_enrichment_section(experiment)) parts.append(_gene_lists_section(experiment)) parts.append(_footer(experiment)) body = "\n".join(parts) return f"""<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Experiment Report — {_esc(experiment.config.name or experiment.id)}</title> <style>{_CSS}</style> </head> <body> {body} </body> </html>"""
def _esc(text: str) -> str: return html.escape(str(text)) def _pct(v: float) -> str: return f"{v * 100:.1f}%" def _num(v: float) -> str: return f"{v:.4f}" def _header(exp: Experiment) -> str: name = exp.config.name or f"Experiment {exp.id}" ts = exp.created_at or datetime.now(UTC).isoformat() mode = exp.config.mode return f""" <h1>{_esc(name)}</h1> <p class="meta"> Mode: <strong>{_esc(mode)}</strong> &middot; Site: <strong>{_esc(exp.config.site_id)}</strong> &middot; Record type: <strong>{_esc(exp.config.record_type)}</strong> &middot; Generated: {_esc(ts)} </p>""" def _config_section(exp: Experiment) -> str: cfg = exp.config rows = [ ("Search", cfg.search_name), ("Positive controls", str(len(cfg.positive_controls))), ("Negative controls", str(len(cfg.negative_controls))), ] if cfg.control_set_id: rows.append(("Control set ID", cfg.control_set_id)) if exp.total_time_seconds: rows.append(("Total time", f"{exp.total_time_seconds:.1f}s")) tbody = "\n".join(f"<tr><td>{_esc(k)}</td><td>{_esc(v)}</td></tr>" for k, v in rows) return f"<h2>Configuration</h2>\n<table>{tbody}</table>" def _rank_metrics_section(rm: RankMetrics) -> str: header = "<tr><th>K</th><th class='numeric'>Precision@K</th><th class='numeric'>Recall@K</th><th class='numeric'>Enrichment@K</th></tr>" rows = [] for k in sorted(rm.precision_at_k.keys()): p = rm.precision_at_k.get(k, 0) r = rm.recall_at_k.get(k, 0) e = rm.enrichment_at_k.get(k, 0) rows.append( f"<tr><td>{k}</td><td class='numeric'>{_pct(p)}</td><td class='numeric'>{_pct(r)}</td><td class='numeric'>{_num(e)}x</td></tr>" ) p50 = rm.precision_at_k.get(50, 0) r50 = rm.recall_at_k.get(50, 0) e50 = rm.enrichment_at_k.get(50, 0) summary = f""" <div class="summary-grid"> <div class="summary-card"><div class="label">Precision@50</div><div class="value">{_pct(p50)}</div></div> <div class="summary-card"><div class="label">Recall@50</div><div class="value">{_pct(r50)}</div></div> <div class="summary-card"><div class="label">Enrichment@50</div><div class="value">{_num(e50)}x</div></div> </div>""" return f"<h2>Rank-Based Metrics</h2>\n{summary}\n<table>{header}\n{''.join(rows)}</table>" def _classification_section(exp: Experiment) -> str: m = exp.metrics if not m: return "" cm = m.confusion_matrix rows = [ ("Sensitivity", _pct(m.sensitivity)), ("Specificity", _pct(m.specificity)), ("Precision", _pct(m.precision)), ("F1 Score", _num(m.f1_score)), ("MCC", _num(m.mcc)), ("Balanced Accuracy", _pct(m.balanced_accuracy)), ] tbody = "\n".join( f"<tr><td>{_esc(k)}</td><td class='numeric'>{_esc(v)}</td></tr>" for k, v in rows ) cm_html = f""" <h3>Confusion Matrix</h3> <table> <tr><th></th><th class='numeric'>Predicted +</th><th class='numeric'>Predicted -</th></tr> <tr><td>Actual +</td><td class='numeric'>{cm.true_positives}</td><td class='numeric'>{cm.false_negatives}</td></tr> <tr><td>Actual -</td><td class='numeric'>{cm.false_positives}</td><td class='numeric'>{cm.true_negatives}</td></tr> </table>""" return f"<h2>Classification Metrics</h2>\n<table>{tbody}</table>\n{cm_html}" def _robustness_section(br: BootstrapResult) -> str: rows = [] for key, ci in {**br.metric_cis, **br.rank_metric_cis}.items(): rows.append( f"<tr><td>{_esc(key)}</td>" f"<td class='numeric'>{_num(ci.mean)}</td>" f"<td class='numeric ci'>[{_num(ci.lower)}, {_num(ci.upper)}]</td>" f"<td class='numeric'>{_num(ci.std)}</td></tr>" ) header = "<tr><th>Metric</th><th class='numeric'>Mean</th><th class='numeric'>95% CI</th><th class='numeric'>Std</th></tr>" stability_text = f"<p>Top-50 Stability (Jaccard): <strong>{_num(br.top_k_stability)}</strong> ({br.n_iterations} bootstrap iterations)</p>" return f"<h2>Robustness &amp; Uncertainty</h2>\n{stability_text}\n<table>{header}\n{''.join(rows)}</table>" def _step_analysis_section(sa: StepAnalysisResult) -> str: parts: list[str] = ["<h2>Step Analysis</h2>"] if sa.step_evaluations: header = "<tr><th>Step</th><th class='numeric'>Results</th><th class='numeric'>Recall</th><th class='numeric'>FPR</th><th class='numeric'>TP Δ</th><th class='numeric'>FP Δ</th></tr>" rows = [] for ev in sa.step_evaluations: rows.append( f"<tr><td>{_esc(ev.display_name)}</td>" f"<td class='numeric'>{ev.result_count}</td>" f"<td class='numeric'>{_pct(ev.recall)}</td>" f"<td class='numeric'>{_pct(ev.false_positive_rate)}</td>" f"<td class='numeric'>{ev.tp_movement:+d}</td>" f"<td class='numeric'>{ev.fp_movement:+d}</td></tr>" ) parts.append( f"<h3>Per-Step Evaluation</h3>\n<table>{header}\n{''.join(rows)}</table>" ) if sa.step_contributions: header = "<tr><th>Step</th><th class='numeric'>Recall Δ</th><th class='numeric'>FPR Δ</th><th>Verdict</th><th>Narrative</th></tr>" rows = [] for sc in sa.step_contributions: badge_cls = { "essential": "badge-green", "helpful": "badge-blue", "neutral": "badge-amber", "harmful": "badge-red", }.get(sc.verdict, "") rows.append( f"<tr><td>{_esc(sc.search_name)}</td>" f"<td class='numeric'>{sc.recall_delta:+.1%}</td>" f"<td class='numeric'>{sc.fpr_delta:+.1%}</td>" f"<td><span class='badge {badge_cls}'>{_esc(sc.verdict)}</span></td>" f"<td>{_esc(sc.narrative)}</td></tr>" ) parts.append( f"<h3>Step Contribution</h3>\n<table>{header}\n{''.join(rows)}</table>" ) return "\n".join(parts) def _enrichment_section(exp: Experiment) -> str: parts: list[str] = ["<h2>Enrichment Analysis</h2>"] for er in exp.enrichment_results: header = "<tr><th>Term</th><th class='numeric'>Genes</th><th class='numeric'>Fold</th><th class='numeric'>FDR</th></tr>" rows = [] for t in er.terms[:20]: rows.append( f"<tr><td>{_esc(t.term_name)}</td>" f"<td class='numeric'>{t.gene_count}</td>" f"<td class='numeric'>{t.fold_enrichment:.2f}</td>" f"<td class='numeric'>{t.fdr:.2e}</td></tr>" ) parts.append( f"<h3>{_esc(er.analysis_type)} ({er.total_genes_analyzed} genes)</h3>\n" f"<table>{header}\n{''.join(rows)}</table>" ) return "\n".join(parts) def _gene_lists_section(exp: Experiment) -> str: parts: list[str] = ["<h2>Gene Lists</h2>"] for label, genes in [ ("True Positives", exp.true_positive_genes), ("False Negatives", exp.false_negative_genes), ("False Positives", exp.false_positive_genes), ]: if not genes: continue gene_ids = ", ".join(g.id for g in genes[:200]) parts.append( f"<details><summary>{label} ({len(genes)})</summary>" f"<div class='gene-list'>{_esc(gene_ids)}</div></details>" ) return "\n".join(parts) def _footer(exp: Experiment) -> str: return f""" <hr style="margin-top: 2rem; border: none; border-top: 1px solid #e5e7eb;"> <p class="meta" style="text-align: center; margin-top: 12px;"> Experiment ID: {_esc(exp.id)} &middot; Generated by Pathfinder Experiment Lab </p>"""