Source code for veupath_chatbot.services.experiment.seed.helpers

"""Shared parameter-building helpers for seed definitions.

Every VEuPathDB component site seed file needs to build WDK search parameter
dicts.  These helpers encode the common patterns — organism JSON encoding,
GO term searches, text searches, signal peptide, transmembrane domains, etc.

Each seed file may still define site-specific helpers locally.
"""

import json


[docs] def org(names: list[str]) -> str: """Encode an organism name list as a WDK JSON-array string.""" return json.dumps(names)
[docs] def go_search_params( organism: str, go_id: str, *, evidence: list[str] | None = None, go_term_value: str | None = None, ) -> dict[str, str]: """Build GenesByGoTerm search parameters. :param organism: Organism full name (e.g. "Plasmodium falciparum 3D7"). :param go_id: GO term identifier (e.g. "GO:0004672"). :param evidence: Evidence code filter. Defaults to ``["Curated", "Computed"]``. :param go_term_value: Value for the ``go_term`` field. Defaults to *go_id*. GiardiaDB uses ``"N/A"`` here. """ if evidence is None: evidence = ["Curated", "Computed"] return { "organism": org([organism]), "go_term_evidence": json.dumps(evidence), "go_term_slim": "No", "go_typeahead": json.dumps([go_id]), "go_term": go_term_value if go_term_value is not None else go_id, }
[docs] def text_search_params( organism: str, expression: str, *, fields: list[str] | None = None, ) -> dict[str, str]: """Build GenesByText search parameters. Args: organism: Organism full name. expression: Free-text query (e.g. "kinase", "rhoptry"). fields: Fields to search. Defaults to ``["product"]``. """ if fields is None: fields = ["product"] return { "text_search_organism": org([organism]), "text_expression": expression, "document_type": "gene", "text_fields": json.dumps(fields), }
[docs] def signal_peptide_params(organism: str) -> dict[str, str]: """Build GenesWithSignalPeptide search parameters.""" return {"organism": org([organism])}
[docs] def transmembrane_params( organism: str, min_tm: str, max_tm: str, ) -> dict[str, str]: """Build GenesByTransmembraneDomains search parameters. Callers pass default min/max values appropriate to their site context. """ return { "organism": org([organism]), "min_tm": min_tm, "max_tm": max_tm, }
[docs] def mol_weight_params( organism: str, min_mw: str, max_mw: str, ) -> dict[str, str]: """Build GenesByMolecularWeight search parameters.""" return { "organism": org([organism]), "min_molecular_weight": min_mw, "max_molecular_weight": max_mw, }
[docs] def ec_search_params( organism: str, *, ec_number: str, ec_sources: list[str], ec_wildcard: str = "No", ) -> dict[str, str]: """Build GenesByEcNumber search parameters. Args: organism: Organism full name. ec_number: EC number pattern (e.g. "2.7.11.1"). ec_sources: Evidence sources list (e.g. ``["KEGG_Enzyme"]``). ec_wildcard: Wildcard flag. Defaults to ``"No"``. """ return { "organism": org([organism]), "ec_source": json.dumps(ec_sources), "ec_number_pattern": ec_number, "ec_wildcard": ec_wildcard, }
[docs] def gene_type_params( organism: str, gene_type: str = "protein coding", ) -> dict[str, str]: """Build GenesByGeneType search parameters.""" return { "organism": org([organism]), "geneType": json.dumps([gene_type]), "includePseudogenes": "No", }
[docs] def interpro_params( organism: str, database: str, typeahead: str, ) -> dict[str, str]: """Build GenesByInterproDomain search parameters.""" return { "organism": org([organism]), "domain_database": database, "domain_typeahead": typeahead, "domain_accession": "*", }
[docs] def location_params( organism: str, chromosome: str, start: str, end: str, ) -> dict[str, str]: """Build GenesByLocation search parameters.""" return { "organismSinglePick": org([organism]), "chromosomeOptional": chromosome, "sequenceId": "", "start_point": start, "end_point": end, }
[docs] def exon_count_params( organism: str, min_exons: str, max_exons: str, ) -> dict[str, str]: """Build GenesByExonCount search parameters.""" return { "organism": org([organism]), "scope": "Gene", "num_exons_gte": min_exons, "num_exons_lte": max_exons, }
[docs] def taxon_params(organism: str) -> dict[str, str]: """Build GenesByTaxon search parameters.""" return {"organism": org([organism])}
[docs] def rnaseq_fc_params( *, dataset_url: str, profileset: str, direction: str, ref_samples: list[str], comp_samples: list[str], fold_change: str = "2", hard_floor: str, protein_coding: str = "yes", ref_op: str = "average1", comp_op: str = "average1", ) -> dict[str, str]: """Build RNA-Seq fold-change search parameters.""" return { "dataset_url": dataset_url, "profileset_generic": profileset, "regulated_dir": direction, "samples_fc_ref_generic": json.dumps(ref_samples), "min_max_avg_ref": ref_op, "samples_fc_comp_generic": json.dumps(comp_samples), "min_max_avg_comp": comp_op, "fold_change": fold_change, "hard_floor": hard_floor, "protein_coding_only": protein_coding, }
[docs] def paralog_count_params( organism: str, min_p: str, max_p: str, ) -> dict[str, str]: """Build GenesByParalogCount search parameters.""" return { "organism": org([organism]), "num_paralogs": json.dumps({"min": min_p, "max": max_p}), }