Source code for veupath_chatbot.services.export.service

"""Export service — CSV/TSV/TXT generation + Redis temp storage."""

import base64
import csv
import io
import json
import re
from dataclasses import dataclass
from typing import Literal
from uuid import uuid4

from redis.asyncio import Redis

from veupath_chatbot.platform.logging import get_logger
from veupath_chatbot.services.experiment.types import Experiment
from veupath_chatbot.services.experiment.types.enrichment import EnrichmentResult
from veupath_chatbot.services.gene_sets.types import GeneSet

logger = get_logger(__name__)

EXPORT_TTL = 600  # 10 minutes
REDIS_PREFIX = "export:"


[docs] @dataclass(frozen=True, slots=True) class ExportResult: """Metadata returned after generating an export file.""" export_id: str filename: str content_type: str url: str size_bytes: int expires_in_seconds: int
def _sanitize_filename(name: str) -> str: """Strip non-alphanumeric chars from a name for use in filenames.""" return re.sub(r"[^a-zA-Z0-9_-]", "_", name)[:60]
[docs] class ExportService: """Generates downloadable files and stores them in Redis with TTL."""
[docs] def __init__(self, redis: Redis) -> None: self._redis = redis
async def _store( self, content: bytes, filename: str, content_type: str ) -> ExportResult: """Store file bytes in Redis and return export metadata.""" export_id = str(uuid4()) key = f"{REDIS_PREFIX}{export_id}" payload = json.dumps( { "filename": filename, "content_type": content_type, "data": base64.b64encode(content).decode("ascii"), } ) await self._redis.set(key, payload.encode("utf-8"), ex=EXPORT_TTL) logger.info( "Export stored", export_id=export_id, filename=filename, size_bytes=len(content), ) from veupath_chatbot.platform.context import request_base_url_ctx base = request_base_url_ctx.get() or "" return ExportResult( export_id=export_id, filename=filename, content_type=content_type, url=f"{base}/api/v1/exports/{export_id}", size_bytes=len(content), expires_in_seconds=EXPORT_TTL, )
[docs] async def get_export(self, export_id: str) -> tuple[bytes, str, str] | None: """Retrieve stored export. Returns (content, filename, content_type) or None.""" key = f"{REDIS_PREFIX}{export_id}" raw = await self._redis.get(key) if raw is None: return None data = json.loads(raw) content = base64.b64decode(data["data"]) return content, data["filename"], data["content_type"]
[docs] async def export_gene_set( self, gene_set: GeneSet, format: Literal["csv", "txt"] ) -> ExportResult: """Export a gene set as CSV or TXT.""" name_part = _sanitize_filename(gene_set.name or "gene_set") if format == "txt": content = "\n".join(gene_set.gene_ids).encode("utf-8") return await self._store(content, f"{name_part}.txt", "text/plain") buf = io.StringIO() writer = csv.writer(buf) writer.writerow(["gene_id"]) for gid in gene_set.gene_ids: writer.writerow([gid]) return await self._store( buf.getvalue().encode("utf-8"), f"{name_part}.csv", "text/csv" )
def _enrichment_rows( self, results: list[EnrichmentResult] ) -> tuple[list[str], list[list[object]]]: """Build header + data rows from enrichment results.""" header = [ "analysis_type", "term_id", "term_name", "gene_count", "background_count", "fold_enrichment", "odds_ratio", "p_value", "fdr", "bonferroni", "genes", ] rows: list[list[object]] = [] for result in results: for term in result.terms: rows.append( [ result.analysis_type, term.term_id, term.term_name, term.gene_count, term.background_count, term.fold_enrichment, term.odds_ratio, term.p_value, term.fdr, term.bonferroni, ";".join(term.genes), ] ) return header, rows
[docs] async def export_enrichment( self, results: list[EnrichmentResult], name: str ) -> ExportResult: """Export enrichment results as CSV.""" name_part = _sanitize_filename(name or "enrichment") header, rows = self._enrichment_rows(results) buf = io.StringIO() writer = csv.writer(buf) writer.writerow(header) for row in rows: writer.writerow(row) return await self._store( buf.getvalue().encode("utf-8"), f"{name_part}_enrichment.csv", "text/csv" )
[docs] async def export_enrichment_tsv( self, results: list[EnrichmentResult], name: str ) -> ExportResult: """Export enrichment results as TSV.""" name_part = _sanitize_filename(name or "enrichment") header, rows = self._enrichment_rows(results) buf = io.StringIO() writer = csv.writer(buf, delimiter="\t") writer.writerow(header) for row in rows: writer.writerow(row) return await self._store( buf.getvalue().encode("utf-8"), f"{name_part}_enrichment.tsv", "text/tab-separated-values", )
[docs] async def export_enrichment_json( self, results: list[EnrichmentResult], name: str ) -> ExportResult: """Export enrichment results as JSON.""" from veupath_chatbot.services.experiment.types import to_json name_part = _sanitize_filename(name or "enrichment") serialized = [to_json(r) for r in results] content = json.dumps(serialized, indent=2).encode("utf-8") return await self._store( content, f"{name_part}_enrichment.json", "application/json" )
[docs] async def export_json(self, data: object, name: str) -> ExportResult: """Export arbitrary data as JSON.""" name_part = _sanitize_filename(name or "export") content = json.dumps(data, indent=2, default=str).encode("utf-8") return await self._store(content, f"{name_part}.json", "application/json")
[docs] async def export_experiment_results( self, experiment: Experiment, format: Literal["csv", "tsv"] ) -> ExportResult: """Export experiment gene classifications as CSV or TSV.""" name_part = _sanitize_filename(experiment.config.name or experiment.id) delimiter = "\t" if format == "tsv" else "," ext = "tsv" if format == "tsv" else "csv" content_type = "text/tab-separated-values" if format == "tsv" else "text/csv" buf = io.StringIO() writer = csv.writer(buf, delimiter=delimiter) writer.writerow( ["gene_id", "gene_name", "organism", "product", "classification"] ) for gene in experiment.true_positive_genes: writer.writerow( [ gene.id, gene.name or "", gene.organism or "", gene.product or "", "TP", ] ) for gene in experiment.false_positive_genes: writer.writerow( [ gene.id, gene.name or "", gene.organism or "", gene.product or "", "FP", ] ) for gene in experiment.false_negative_genes: writer.writerow( [ gene.id, gene.name or "", gene.organism or "", gene.product or "", "FN", ] ) for gene in experiment.true_negative_genes: writer.writerow( [ gene.id, gene.name or "", gene.organism or "", gene.product or "", "TN", ] ) return await self._store( buf.getvalue().encode("utf-8"), f"{name_part}_results.{ext}", content_type )