Source code for veupath_chatbot.services.experiment._deserialize

"""Deserialize JSON dicts back into Experiment dataclass trees.

Simple sub-types are deserialized via the generic ``from_json`` converter.
Only ``Experiment`` / ``ExperimentConfig`` require hand-written logic due
to conditional field defaults and enrichment deduplication.
"""

from typing import Any, cast

from veupath_chatbot.services.experiment.types import (
    BootstrapResult,
    ControlValueFormat,
    CrossValidationResult,
    EnrichmentResult,
    Experiment,
    ExperimentConfig,
    ExperimentMetrics,
    GeneInfo,
    OperatorKnob,
    OptimizationSpec,
    RankMetrics,
    StepAnalysisResult,
    ThresholdKnob,
    TreeOptimizationResult,
)
from veupath_chatbot.services.experiment.types.core import DEFAULT_STEP_ANALYSIS_PHASES
from veupath_chatbot.services.experiment.types.json_codec import from_json



[docs]
def experiment_from_json(d: dict[str, Any]) -> Experiment:
    """Reconstruct an :class:`Experiment` from its JSON representation.

    :param d: Dict produced by :func:`experiment_to_json`.
    :returns: Fully hydrated Experiment dataclass.
    """
    cfg = d["config"]

    opt_specs = None
    raw_specs = cfg.get("optimizationSpecs")
    if raw_specs and isinstance(raw_specs, list):
        opt_specs = [from_json(s, OptimizationSpec) for s in raw_specs]

    config = ExperimentConfig(
        site_id=cfg["siteId"],
        record_type=cfg["recordType"],
        search_name=cfg.get("searchName", ""),
        parameters=cfg.get("parameters", {}),
        positive_controls=cfg.get("positiveControls", []),
        negative_controls=cfg.get("negativeControls", []),
        controls_search_name=cfg.get("controlsSearchName", ""),
        controls_param_name=cfg.get("controlsParamName", ""),
        controls_value_format=cast(
            ControlValueFormat, cfg.get("controlsValueFormat", "newline")
        ),
        enable_cross_validation=cfg.get("enableCrossValidation", False),
        k_folds=cfg.get("kFolds", 5),
        enrichment_types=cfg.get("enrichmentTypes", []),
        name=cfg.get("name", ""),
        description=cfg.get("description", ""),
        mode=cfg.get("mode", "single"),
        step_tree=cfg.get("stepTree"),
        source_strategy_id=cfg.get("sourceStrategyId"),
        optimization_target_step=cfg.get("optimizationTargetStep"),
        optimization_specs=opt_specs,
        optimization_budget=cfg.get("optimizationBudget", 30),
        optimization_objective=cfg.get("optimizationObjective", "balanced_accuracy"),
        parameter_display_values=cfg.get("parameterDisplayValues"),
        enable_step_analysis=cfg.get("enableStepAnalysis", False),
        step_analysis_phases=cfg.get(
            "stepAnalysisPhases",
            list(DEFAULT_STEP_ANALYSIS_PHASES),
        ),
        control_set_id=cfg.get("controlSetId"),
        # BUG FIX: cfg.get("thresholdKnobs") could be None (not just missing),
        # which would cause TypeError when iterated. Guard with `or []`.
        threshold_knobs=[
            from_json(k, ThresholdKnob) for k in (cfg.get("thresholdKnobs") or [])
        ]
        or None,
        operator_knobs=[
            from_json(k, OperatorKnob) for k in (cfg.get("operatorKnobs") or [])
        ]
        or None,
        tree_optimization_objective=cfg.get(
            "treeOptimizationObjective", "precision_at_50"
        ),
        tree_optimization_budget=cfg.get("treeOptimizationBudget", 50),
        max_list_size=cfg.get("maxListSize"),
        sort_attribute=cfg.get("sortAttribute"),
        sort_direction=cfg.get("sortDirection", "ASC"),
        parent_experiment_id=cfg.get("parentExperimentId"),
    )

    exp = Experiment(
        id=d["id"],
        config=config,
        user_id=d.get("userId"),
        status=d.get("status", "completed"),
        created_at=d.get("createdAt", ""),
        completed_at=d.get("completedAt"),
    )

    if d.get("metrics"):
        exp.metrics = from_json(d["metrics"], ExperimentMetrics)
    if d.get("crossValidation"):
        exp.cross_validation = from_json(d["crossValidation"], CrossValidationResult)

    # Deduplicate enrichment results by analysis_type, keeping the last
    # (most recent) entry.  This cleans up data persisted before the
    # upsert_enrichment_result helper was introduced.
    _seen_types: dict[str, int] = {}
    _all_er = [from_json(er, EnrichmentResult) for er in d.get("enrichmentResults", [])]
    for i, er in enumerate(_all_er):
        _seen_types[er.analysis_type] = i
    exp.enrichment_results = [_all_er[i] for i in sorted(_seen_types.values())]

    for attr, key in (
        ("true_positive_genes", "truePositiveGenes"),
        ("false_negative_genes", "falseNegativeGenes"),
        ("false_positive_genes", "falsePositiveGenes"),
        ("true_negative_genes", "trueNegativeGenes"),
    ):
        setattr(exp, attr, [from_json(g, GeneInfo) for g in d.get(key, [])])
    exp.error = d.get("error")
    exp.total_time_seconds = d.get("totalTimeSeconds")
    exp.batch_id = d.get("batchId")
    exp.benchmark_id = d.get("benchmarkId")
    exp.control_set_label = d.get("controlSetLabel")
    exp.is_primary_benchmark = d.get("isPrimaryBenchmark", False)
    exp.optimization_result = d.get("optimizationResult")
    exp.wdk_strategy_id = d.get("wdkStrategyId")
    exp.wdk_step_id = d.get("wdkStepId")
    exp.notes = d.get("notes")

    for attr, key, cls in (
        ("step_analysis", "stepAnalysis", StepAnalysisResult),
        ("rank_metrics", "rankMetrics", RankMetrics),
        ("robustness", "robustness", BootstrapResult),
        ("tree_optimization", "treeOptimization", TreeOptimizationResult),
    ):
        raw = d.get(key)
        if raw:
            setattr(exp, attr, from_json(raw, cls))

    return exp