# Readout / assay catalog for the ASSAY_TO_FUNCTION project.
#
# Each entry describes a class of experimental readout used to infer gene
# function. The central hypothesis of this project: the reliability of the
# inference "perturb gene G -> readout R moves -> G is involved in process P"
# depends on two axes:
#
#   proximity:   molecular   = measures the gene product's own activity
#                phenotypic  = measures a downstream cellular consequence
#   convergence: low  = readout is a fairly specific signature of process P
#                high = readout is a hub many upstream inputs feed into
#
# Over-annotation risk is highest for phenotypic + high-convergence readouts.
#
# `patterns` are case-insensitive regular expressions matched against the
# review evidence text (summary + reason + supporting_text). Probe/instrument
# names are preferred over generic process words to avoid matching a GO term
# label to itself.

readouts:

  # ---- Phenotypic, high-convergence "hub" readouts (over-annotation traps) ----

  UPR_ER_STRESS:
    reports_state_of: unfolded protein response / ER stress
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'unfolded protein|ER stress|endoplasmic reticulum stress|reticulum unfolded'
    commonly_overmapped_to:
      - GO:0006986   # response to unfolded protein
      - GO:0030968   # endoplasmic reticulum unfolded protein response
      - GO:0034976   # response to endoplasmic reticulum stress
    patterns:
      - '\bUPRE\b'
      - '5\s*[x×]\s*UPRE'
      - '\bERSE\b'   # ER stress element; left boundary required (cf. "diverse")
      - 'XBP1\s*(splic|s\b|-luc)'
      - 'ERAI\b'
      - '(BiP|GRP78|CHOP|ATF4|ATF6)\s*[- ]?(reporter|luciferase|promoter)'

  OXIDATIVE_STRESS_ROS:
    reports_state_of: reactive oxygen species / oxidative stress
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'oxidative stress|reactive oxygen|response to .*oxidant|hydrogen peroxide'
    commonly_overmapped_to:
      - GO:0006979   # response to oxidative stress
      - GO:0034599   # cellular response to oxidative stress
    patterns:
      - 'CellROX'
      - 'H2?DCFDA?'
      - '\bDCF\b'
      - 'MitoSOX'
      - 'dihydroethidium'
      - '\bDHE\b'
      - 'roGFP'
      - '(?-i:HyPer)\b'   # H2O2 biosensor; case-sensitive (cf. "hyper-activation")
      - 'amplex\s*red'

  APOPTOSIS_CASPASE:
    reports_state_of: caspase activation / apoptosis
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'apopto|programmed cell death|caspase'
    commonly_overmapped_to:
      - GO:0006915   # apoptotic process
      - GO:0043065   # positive regulation of apoptotic process
      - GO:0097190   # apoptotic signaling pathway
    patterns:
      - 'CellEvent'
      - 'caspase[- ]?3/?7?\s*(activ|assay|cleav)'
      - 'cleaved\s+caspase'
      - 'Annexin\s*V'
      - 'TUNEL'
      - 'PARP\s*cleav'
      - '\bDEVD'

  # The extreme phenotypic + high-convergence quadrant: a whole-animal behaviour
  # integrates the entire nervous system plus development, metabolism and basic
  # cell biology, so almost any perturbation can move it. A behavioural readout
  # therefore licenses, at most, a cautious non-core BP term and never an MF or
  # core call. Worse, the assay *modality* is easily mistaken for the function:
  # the Morris Water Maze is a swimming-based test of spatial memory, so a gene
  # measured in it (e.g. Casp3 as an apoptosis marker) can be mis-annotated to
  # `swimming behavior`. See the BEHAVIOR project for the corpus-wide analysis.
  BEHAVIORAL_ASSAY:
    reports_state_of: whole-animal behaviour (locomotion, learning/memory, anxiety, feeding, ...)
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'behavior|behaviour|locomot|learning|memory|cognit|swimming behavior|feeding behavior|exploration'
    commonly_overmapped_to:
      - GO:0007610   # behavior
      - GO:0007612   # learning
      - GO:0007626   # locomotory behavior
      - GO:0008344   # adult locomotory behavior
      - GO:0036269   # swimming behavior
      - GO:0007631   # feeding behavior
    patterns:
      - 'morris\s*water\s*maze'
      - '\bMWM\b'
      - 'open[-\s]*field\s*(test|assay|arena|apparatus)'
      - 'rota-?rod'
      - 'forced\s*swim'
      - 'tail\s*suspension'
      - 'elevated\s*plus[-\s]*maze'
      - 'fear\s*conditioning'
      - 'novel\s*object\s*recognition'
      - 'Barnes\s*maze'
      - '\bY-?maze\b'
      - 'light[-/]dark\s*(box|test)'
      - '(acoustic\s*)?startle\s*(response|reflex)'
      - 'beam[-\s]*(walk|balance)'
      - 'grip\s*strength'

  AUTOPHAGY_FLUX:
    reports_state_of: autophagic flux
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'autophag|mitophagy'
    commonly_overmapped_to:
      - GO:0006914   # autophagy
      - GO:0010506   # regulation of autophagy
    patterns:
      - 'GFP[- ]?LC3'
      - 'mCherry[- ]?GFP[- ]?LC3'
      - 'LC3[- ]?II'
      - 'autophag(ic|y)\s*flux'
      - 'p62/?SQSTM1'

  MITO_MEMBRANE_POTENTIAL:
    reports_state_of: mitochondrial membrane potential / health
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'mitochond'
    commonly_overmapped_to:
      - GO:0007005   # mitochondrion organization
    patterns:
      - '\bTMRM\b'
      - '\bTMRE\b'
      - '\bJC-1\b'
      - 'mitochondrial\s+membrane\s+potential'
      - '\bMitoTracker'

  CALCIUM_FLUX:
    reports_state_of: intracellular calcium
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'calcium|\bcalcium-mediated'
    commonly_overmapped_to:
      - GO:0019722   # calcium-mediated signaling
    patterns:
      - 'Fluo-?4'
      - 'Fura-?2'
      - '\bGCaMP'
      - 'calcium\s+imaging'
      - 'Ca2\+\s*(flux|transient|imaging)'

  pH_PROBE:
    reports_state_of: intracellular / compartment pH
    proximity: phenotypic
    convergence: high
    aligned_label_regex: '\bpH\b|pH reduction|pH elevation|acidif'
    commonly_overmapped_to:
      - GO:0045851   # pH reduction
    patterns:
      - 'pHrodo'
      - 'pHluorin'
      - '\bSNARF\b'
      - '\bBCECF\b'

  IRON_PROBE:
    reports_state_of: labile iron pool
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'iron ion|\biron\b|ferrous|ferric|ferroptosis'
    commonly_overmapped_to:
      - GO:0006879   # intracellular iron ion homeostasis
    patterns:
      - 'FeRhoNox'
      - 'labile\s+iron'
      - 'calcein\s*(quench|-AM)'

  TRANSCRIPTIONAL_REPORTER:
    reports_state_of: pathway transcriptional output
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'transcription|gene expression|DNA-templated|DNA-binding transcription'
    commonly_overmapped_to:
      - GO:0006355   # regulation of DNA-templated transcription
    patterns:
      - 'luciferase\s*(reporter|assay|activity)'
      - 'reporter\s*assay'
      - 'promoter[- ]?reporter'
      - '(NF-?[κk]B|ARE|HSE|SRE|CRE)\s*[- ]?(reporter|luciferase)'

  VIABILITY_PROLIFERATION:
    reports_state_of: cell viability / proliferation
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'proliferat|cell population|cell growth|cell division|cell cycle'
    commonly_overmapped_to:
      - GO:0008283   # cell population proliferation
      - GO:0042127   # regulation of cell population proliferation
    patterns:
      - '\bMTT\b'
      - '\bMTS\s+(assay|reagent|reduction)'   # MTS dye (cf. "MTs"=microtubules)
      - 'CCK-?8'
      - 'resazurin'
      - 'alamar\s*blue'
      - 'cell\s+viability\s+assay'
      - 'colony\s+formation'
      - '\bBrdU\b'
      - '\bEdU\b'
      - '\bKi-?67\b'

  CELL_MIGRATION_INVASION:
    reports_state_of: cell migration / invasion / motility
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'cell migration|cell motility|cell locomotion|chemotaxis|cell.{0,12}invasion|wound healing|ameboidal|epithelial cell migration'
    commonly_overmapped_to:
      - GO:0016477   # cell migration
      - GO:0030334   # regulation of cell migration
      - GO:0030335   # positive regulation of cell migration
      - GO:0048870   # cell motility
      - GO:0006935   # chemotaxis
    patterns:
      - 'scratch\s*(assay|wound|-?wound)'
      - 'wound[- ]?healing\s*assay'
      - '\btranswell\b'
      - 'boyden\s*chamber'
      - 'matrigel\s*invasion'
      - 'invasion\s*assay'
      - 'migration\s*assay'
      - 'transwell\s*(migration|invasion)'

  CELL_ADHESION_SPREADING:
    reports_state_of: cell adhesion / spreading / attachment
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'cell adhesion|cell-substrate adhesion|cell-matrix adhesion|cell spreading|substrate adhesion|cell-cell adhesion'
    commonly_overmapped_to:
      - GO:0007155   # cell adhesion
      - GO:0007160   # cell-matrix adhesion
      - GO:0034446   # substrate adhesion-dependent cell spreading
      - GO:0098609   # cell-cell adhesion
    patterns:
      - 'cell\s*adhesion\s*assay'
      - 'cell\s*spreading\s*assay'
      - '\badhesion\s*assay'
      - '\bspreading\s*assay'
      - 'cell[- ]?attachment\s*assay'

  MEMBRANE_TRAFFICKING_ENDOCYTOSIS:
    reports_state_of: endocytosis / membrane trafficking / internalization
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'endocytosis|endocytic|exocytosis|vesicle.{0,12}transport|membrane traffic|receptor.{0,15}internaliz|clathrin'
    commonly_overmapped_to:
      - GO:0006897   # endocytosis
      - GO:0006898   # receptor-mediated endocytosis
      - GO:0016192   # vesicle-mediated transport
      - GO:0006887   # exocytosis
    patterns:
      - 'transferrin\s*(uptake|internaliz|[- ]?(488|568|647)|-?alexa)'
      - 'FM\s*1-?43'
      - 'FM\s*4-?64'
      - 'dextran\s*uptake'
      - '(DiI|Dil)[- ]?(ac-?)?LDL'
      - 'labell?ed\s*LDL'
      - 'endocytosis\s*assay'
      - 'internalization\s*assay'

  SECRETION_DEGRANULATION:
    reports_state_of: secretion / degranulation / regulated release
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'secretion|secretory|degranulation|exocytosis|cytokine production|hormone secretion'
    commonly_overmapped_to:
      - GO:0046903   # secretion
      - GO:0032940   # secretion by cell
      - GO:0002576   # platelet degranulation
      - GO:0043299   # leukocyte degranulation
    patterns:
      - 'degranulation\s*assay'
      - '(β|beta)-?hexosaminidase'
      - 'LDH\s*release'
      - 'lactate\s*dehydrogenase\s*release'
      - '\bCD107a\b'
      - 'insulin\s*secretion\s*assay'
      - '\bsecretion\s*assay'
      # NB: bare "ELISA" is deliberately NOT a pattern -- it is a measurement
      # technique used for almost anything, not a secretion readout (analogous
      # to the "avoid protein binding" guidance). Require a secretion context.

  METABOLIC_FLUX:
    reports_state_of: glucose uptake / respiration / glycolytic flux
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'glucose.{0,15}(import|transport|metabolic)|glycoly|cellular respiration|oxidative phosphorylation|aerobic respiration|ATP metabolic'
    commonly_overmapped_to:
      - GO:0006006   # glucose metabolic process
      - GO:0006096   # glycolytic process
      - GO:1904659   # glucose transmembrane transport
      - GO:0045333   # cellular respiration
      - GO:0006119   # oxidative phosphorylation
    patterns:
      - 'Seahorse'
      - 'oxygen\s*consumption\s*rate'
      # NB: bare "\bOCR\b" is deliberately omitted -- it collides with the
      # C. elegans ocr-* TRPV-channel gene family (ocr-2 etc.); require the
      # spelled-out "oxygen consumption rate" / Seahorse / ECAR instead.
      - 'extracellular\s*acidification\s*rate'
      - '\bECAR\b'
      - '2-?NBDG'
      - '2-?deoxy[- ]?glucose\s*uptake'
      - '2-?DG\s*uptake'
      - 'glucose\s*uptake\s*assay'
      - 'mito\s*stress\s*test'

  DNA_DAMAGE_FOCI:
    reports_state_of: DNA damage / double-strand breaks
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'DNA damage|DNA repair|double-strand break|double strand break|response to DNA damage|DNA integrity'
    commonly_overmapped_to:
      - GO:0006974   # DNA damage response
      - GO:0006281   # DNA repair
      - GO:0006302   # double-strand break repair
      - GO:0042770   # signal transduction in response to DNA damage stimulus
    patterns:
      - '(γ|gamma)-?H2A\.?X'
      - 'phospho-?H2AX'
      - 'p-?H2AX'
      - 'H2AX\s*foci'
      - 'comet\s*assay'
      - '53BP1\s*foci'
      - 'RAD51\s*foci'

  SENESCENCE:
    reports_state_of: cellular senescence
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'senescence|senescent|cell aging|replicative.{0,12}aging'
    commonly_overmapped_to:
      - GO:0090398   # cellular senescence
      - GO:0007569   # cell aging
      - GO:0001302   # replicative cell aging
    patterns:
      - 'SA-?(β|b|beta)-?gal'
      - 'senescence-?associated\s*(β|beta)?-?galactosidase'
      - 'senescence-?associated\s*secretory'
      - '\bSASP\b'

  # ---- Pathway-specific transcriptional reporters (downstream signaling output) ----
  # Like TRANSCRIPTIONAL_REPORTER these are phenotypic hubs, but each maps to a
  # specific signaling pathway's GO terms. A positive reporter licenses a BP
  # "pathway signaling" term, never MF (unless the gene is the pathway's TF).

  WNT_REPORTER:
    reports_state_of: canonical Wnt / beta-catenin transcriptional output
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'Wnt signaling|Wnt receptor signaling|canonical Wnt|beta-catenin|β-catenin'
    commonly_overmapped_to:
      - GO:0016055   # Wnt signaling pathway
      - GO:0060070   # canonical Wnt signaling pathway
      - GO:0090263   # positive regulation of canonical Wnt signaling pathway
    patterns:
      - 'TOP[- ]?FLASH'
      - 'FOP[- ]?FLASH'
      - 'TOP/?FOP'
      - 'TCF/?LEF\s*(reporter|luciferase|activity)'
      - 'TCF\s*reporter'

  NFKB_REPORTER:
    reports_state_of: NF-kappaB transcriptional output
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'NF-?kappaB|NF-?κB|NF-kappaB signaling|canonical NF-kappaB|I-?kappaB'
    commonly_overmapped_to:
      - GO:0007249   # canonical NF-kappaB signal transduction
      - GO:0038061   # non-canonical NF-kappaB signal transduction
      - GO:0051092   # positive regulation of NF-kappaB transcription factor activity
    patterns:
      - 'NF-?[κk]B\s*[- ]?(reporter|luciferase|luc\b)'
      - 'NF-?[κk]B[- ]?driven\s*(reporter|luciferase)'

  HYPOXIA_HIF:
    reports_state_of: hypoxia / HIF transcriptional output
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'response to hypoxia|cellular response to hypoxia|hypoxia-inducible|response to decreased oxygen|response to oxygen levels'
    commonly_overmapped_to:
      - GO:0001666   # response to hypoxia
      - GO:0071456   # cellular response to hypoxia
      - GO:0036294   # cellular response to decreased oxygen levels
    patterns:
      - 'HRE[- ]?(reporter|luciferase|luc)'
      - 'hypoxia\s*response\s*element'
      - 'HIF[- ]?(reporter|luciferase|luc)'
      - 'HIF-?1[αa]\s*stabiliz'
      - 'pimonidazole'
      - '\bEF5\b'

  NOTCH_REPORTER:
    reports_state_of: Notch transcriptional output
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'Notch signaling|Notch receptor signaling'
    commonly_overmapped_to:
      - GO:0007219   # Notch signaling pathway
      - GO:0045747   # positive regulation of Notch signaling pathway
    patterns:
      - 'RBP-?J[κk]?\s*(reporter|luciferase|-?luc)'
      - 'CSL[- ]?(reporter|luciferase|luc)'
      - 'CBF-?1\s*(reporter|luciferase)'
      - 'TP-?1\s*(reporter|luciferase)'
      - 'HES1\s*(reporter|promoter|luciferase)'

  HIPPO_TEAD_REPORTER:
    reports_state_of: Hippo / YAP-TAZ-TEAD transcriptional output
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'hippo signaling|regulation of hippo'
    commonly_overmapped_to:
      - GO:0035329   # hippo signaling
      - GO:0035330   # regulation of hippo signaling
    patterns:
      - '\d*x?GTIIC'
      - 'TEAD\s*[- ]?(reporter|luciferase|-?luc)'
      - 'YAP/?TAZ\s*reporter'

  CELL_DIFFERENTIATION:
    reports_state_of: cell differentiation / lineage commitment
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'cell differentiation|osteoblast differentiation|fat cell differentiation|chondrocyte differentiation|myoblast differentiation|neuron differentiation|cell fate commitment|stem cell differentiation'
    commonly_overmapped_to:
      - GO:0030154   # cell differentiation
      - GO:0001649   # osteoblast differentiation
      - GO:0045444   # fat cell differentiation
      - GO:0045165   # cell fate commitment
    patterns:
      - 'alizarin\s+red'
      - 'oil\s+red\s+o'
      - 'alcian\s+blue'
      - 'von\s+kossa'
      - 'differentiation\s+assay'
      - '(osteogenic|adipogenic|chondrogenic|myogenic|neurogenic)\s+differentiation'
      - 'lineage\s+(marker|commitment)'

  ANGIOGENESIS_TUBE:
    reports_state_of: angiogenesis / endothelial tube formation
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'angiogenesis|blood vessel|vasculature development|vasculogenesis|sprouting'
    commonly_overmapped_to:
      - GO:0001525   # angiogenesis
      - GO:0045766   # positive regulation of angiogenesis
      - GO:0001570   # vasculogenesis
    patterns:
      - 'tube\s+formation\s+assay'
      - 'tubule\s+formation'
      - 'capillary[- ]?like\s+(structure|tube|network)'
      - 'aortic\s+ring\s+assay'
      - 'chorioallantoic\s+membrane'
      - '\bCAM\s+assay'
      - 'matrigel\s+plug'
      - 'sprouting\s+assay'

  PHAGOCYTOSIS:
    reports_state_of: phagocytosis / engulfment / clearance
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'phagocytosis|engulfment|efferocytosis|apoptotic cell clearance'
    commonly_overmapped_to:
      - GO:0006909   # phagocytosis
      - GO:0006911   # phagocytosis, engulfment
      - GO:0043277   # apoptotic cell clearance
    patterns:
      - 'phagocytosis\s+assay'
      - 'phagocytic\s+(assay|index|uptake|activity|capacity)'
      - 'engulfment\s+(assay|of)'
      - 'efferocytosis'
      - '(zymosan|bioparticle|latex\s+bead)\s+(uptake|engulf|phagocyt)'
      - 'apoptotic\s+cell\s+(clearance|engulf|uptake)'

  CELL_CYCLE_FLOW:
    reports_state_of: cell cycle phase distribution
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'cell cycle|mitotic cell cycle|G1/S transition|G2/M transition|cell cycle arrest|DNA replication'
    commonly_overmapped_to:
      - GO:0007049   # cell cycle
      - GO:0051726   # regulation of cell cycle
      - GO:0000082   # G1/S transition of mitotic cell cycle
      - GO:0000086   # G2/M transition of mitotic cell cycle
    patterns:
      - 'cell\s+cycle\s+(analysis|distribution|profil|arrest\s+assay)'
      - 'propidium\s+iodide'
      - '\bsub-?G1\b'
      - 'G0/G1\s+(arrest|phase|accumulat)'
      - 'G2/M\s+(arrest|phase|accumulat)'

  BARRIER_PERMEABILITY:
    reports_state_of: epithelial / endothelial barrier integrity
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'cell junction|tight junction|cell-cell junction|bicellular tight junction|establishment.{0,15}barrier|paracellular'
    commonly_overmapped_to:
      - GO:0070160   # tight junction
      - GO:0045216   # cell-cell junction organization
      - GO:0034329   # cell junction assembly
    patterns:
      - '\bTEER\b'
      - 'transepithelial\s+(electrical\s+)?resistance'
      - 'transendothelial\s+(electrical\s+)?resistance'
      - 'paracellular\s+(permeability|flux|transport)'
      - 'FITC-?dextran\s+(permeability|flux|leakage)'
      - 'barrier\s+(function|integrity|permeability)\s+assay'

  PROTEIN_TURNOVER:
    reports_state_of: protein stability / turnover / half-life
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'protein catabolic|protein stabiliz|protein destabiliz|regulation of protein stability|protein turnover|ubiquitin-dependent protein catabolic|protein deubiquitination'
    commonly_overmapped_to:
      - GO:0030163   # protein catabolic process
      - GO:0050821   # protein stabilization
      - GO:0031647   # regulation of protein stability
      - GO:0043161   # proteasome-mediated ubiquitin-dependent protein catabolic process
    patterns:
      - 'cycloheximide\s+chase'
      - '\bCHX\s+chase'
      - 'pulse-?chase'
      - 'protein\s+half-?life'
      - 'protein\s+(stability|turnover)\s+(assay|experiment)'
      - 'degradation\s+rate'

  REDOX_BALANCE:
    reports_state_of: cellular redox couple ratio (GSH/GSSG, NAD(P)+/NAD(P)H)
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'cell redox homeostasis|glutathione metabolic|oxidation-reduction process|NAD metabolic|NADP metabolic|glutathione biosynthetic'
    commonly_overmapped_to:
      - GO:0045454   # cell redox homeostasis
      - GO:0006749   # glutathione metabolic process
      - GO:0006739   # NADP metabolic process
      - GO:0006734   # NADH metabolic process
    patterns:
      - 'GSH/GSSG'
      - 'GSSG/GSH'
      - '\bGSSG\b'
      - 'glutathione\s+(ratio|redox|disulfide)'
      - 'reduced\s+(and\s+)?oxidized\s+glutathione'
      - 'NAD\+?/NADH'
      - 'NADH/NAD'
      - 'NADP\+?/NADPH'
      - 'NADPH/NADP'

  LIPID_PEROXIDATION:
    reports_state_of: lipid peroxidation (oxidative lipid damage / ferroptosis)
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'lipid oxidation|response to oxidative stress|ferroptosis|cellular response to oxidative stress|lipid peroxidation'
    commonly_overmapped_to:
      - GO:0006979   # response to oxidative stress
      - GO:0034599   # cellular response to oxidative stress
      - GO:0097707   # ferroptosis
    patterns:
      - 'malondialdehyde'
      - '\bTBARS\b'
      - '4-?HNE\b'
      - '4-?hydroxynonenal'
      - 'BODIPY-?(C11|581|665)'
      - 'C11-?BODIPY'
      - 'lipid\s+peroxidation'
      - 'lipid\s+ROS'

  TRANSLATION_ASSAY:
    reports_state_of: protein synthesis / translation rate
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'translation|protein synthesis|translational|peptide biosynthetic|cytoplasmic translation|regulation of translation'
    commonly_overmapped_to:
      - GO:0006412   # translation
      - GO:0006417   # regulation of translation
      - GO:0043043   # peptide biosynthetic process
    patterns:
      - 'SUnSET'
      - 'puromycin\s+(labeling|labelling|incorporation)'
      - 'puromycin-?label'
      - 'polysome\s+(profil|fractionation|analysis)'
      - 'polyribosome\s+profil'
      - '35S-?methionine\s+(incorporation|labeling|pulse)'
      - '\[35S\]\s*-?(methionine|met\b)'
      - 'nascent\s+(protein|peptide)\s+(synthesis|labeling)'
      - 'de\s+novo\s+protein\s+synthesis'

  CYCLIC_NUCLEOTIDE_SIGNALING:
    reports_state_of: cAMP / cGMP second-messenger level
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'cAMP-mediated signaling|cGMP-mediated signaling|cyclic nucleotide|adenylate cyclase-modulating|adenylate cyclase-activating|G protein-coupled receptor signaling pathway'
    commonly_overmapped_to:
      - GO:0019933   # cAMP-mediated signaling
      - GO:0019934   # cGMP-mediated signaling
      - GO:0007188   # adenylate cyclase-modulating GPCR signaling pathway
      - GO:0007189   # adenylate cyclase-activating GPCR signaling pathway
    patterns:
      # NB: bare "cAMP" is avoided -- IGNORECASE \bcAMP\b would hit "camp" and the
      # token collides with "hippocampus"/"campaign" as a screen substring. Require
      # an explicit assay/level/sensor context.
      - 'cyclic\s+(AMP|adenosine\s+monophosphate|GMP|guanosine\s+monophosphate)'
      - 'c[AG]MP\s+(assay|accumulation|level|production|response|elevation|formation)'
      - 'c[AG]MP\s+(ELISA|biosensor)'
      - 'intracellular\s+c[AG]MP'
      - '\bEpac[- ]?(sensor|FRET|based)'

  CYTOTOXICITY_KILLING:
    reports_state_of: target-cell killing / cytotoxicity (immune)
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'cell killing|cytolysis|cytotoxicity|natural killer cell mediated|T cell mediated cytotox|leukocyte mediated cytotox|killing of cells'
    commonly_overmapped_to:
      - GO:0001906   # cell killing
      - GO:0031640   # killing of cells of another organism
      - GO:0042267   # natural killer cell mediated cytotoxicity
      - GO:0001913   # T cell mediated cytotoxicity
    patterns:
      - '(51Cr|chromium)[- ]?release'
      - '\[51Cr\]'
      - 'calcein[- ]?(AM\s+)?release'
      - 'cytotoxicity\s+assay'
      - 'killing\s+assay'
      - 'cytolytic\s+(assay|activity)'
      - 'specific\s+lysis'
      - 'target\s+cell\s+(lysis|killing)'

  CYTOKINE_PRODUCTION:
    reports_state_of: cytokine production / secretion (immune)
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'cytokine production|cytokine secretion|interleukin.{0,10}production|interferon.{0,12}production|tumor necrosis factor production|cytokine-mediated|inflammatory response'
    commonly_overmapped_to:
      - GO:0001816   # cytokine production
      - GO:0032609   # interferon-gamma production
      - GO:0032635   # interleukin-6 production
      - GO:0006954   # inflammatory response
    patterns:
      - 'ELISpot'
      - 'intracellular\s+cytokine\s+stain'
      - 'cytokine\s+(ELISA|bead\s+array|multiplex|secretion\s+assay)'
      - 'cytokine\s+production\s+assay'

  HISTONE_MARK:
    reports_state_of: histone modification state (chromatin marks)
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'histone modification|histone methylation|histone acetylation|histone lysine|chromatin organization|chromatin remodeling|covalent chromatin modification|chromatin modification'
    commonly_overmapped_to:
      - GO:0016570   # histone modification
      - GO:0006325   # chromatin organization
      - GO:0016571   # histone methylation
      - GO:0016573   # histone acetylation
    patterns:
      - 'H3K\d+(me[1-3]|ac)'
      - 'H4K\d+(me[1-3]|ac)'
      - 'H3K\d+\s+(methylation|acetylation|trimethylation|dimethylation)'
      - '(trimethylation|dimethylation)\s+of\s+H3'
      - 'histone\s+H3\s+(lysine|K\d)'

  EMT_MARKERS:
    reports_state_of: epithelial-mesenchymal transition (marker switch)
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'epithelial to mesenchymal transition|epithelial-mesenchymal|mesenchymal cell differentiation|mesenchymal stem cell'
    commonly_overmapped_to:
      - GO:0001837   # epithelial to mesenchymal transition
      - GO:0010718   # positive regulation of epithelial to mesenchymal transition
    patterns:
      - 'epithelial[- ]?(to\s+)?mesenchymal\s+transition'
      - '\bEMT\b\s+(marker|phenotype|process|program|induction|signature)'
      - 'mesenchymal\s+(marker|phenotype)'
      - 'cadherin\s+switch'

  STEMNESS_SPHERE:
    reports_state_of: stemness / self-renewal (sphere formation)
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'stem cell population maintenance|stem cell division|stem cell differentiation|stem cell proliferation|maintenance of.{0,15}identity'
    commonly_overmapped_to:
      - GO:0019827   # stem cell population maintenance
      - GO:0017145   # stem cell division
      - GO:0048863   # stem cell differentiation
    patterns:
      - 'sphere\s+formation\s+assay'
      - 'tumou?rsphere'
      - 'mammosphere'
      - 'neurosphere'
      - 'self-?renewal\s+(assay|capacity)'
      - 'stemness\s+(marker|assay)'
      - 'side\s+population\s+(assay|cells)'

  PROTEIN_AGGREGATION:
    reports_state_of: protein aggregation / amyloid formation
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'protein folding|amyloid|response to topologically incorrect protein|response to.{0,15}misfolded|amyloid fibril formation|inclusion body|chaperone-mediated'
    commonly_overmapped_to:
      - GO:0006457   # protein folding
      - GO:0035966   # response to topologically incorrect protein
      - GO:0034620   # cellular response to misfolded protein
      - GO:1990000   # amyloid fibril formation
    patterns:
      - 'thioflavin[- ]?(T|S)'
      - '\bThT\s+(assay|fluoresc|binding)'
      - 'amyloid\s+(fibril|aggregation|formation|assay)'
      - 'protein\s+aggregation\s+assay'
      - 'aggresome'
      - 'inclusion\s+body\s+formation'
      - 'Congo\s+red'
      - 'aggregation\s+(kinetics|propensity)'

  INFLAMMASOME_PYROPTOSIS:
    reports_state_of: inflammasome activation / pyroptosis
    proximity: phenotypic
    convergence: high
    aligned_label_regex: 'inflammasome|interleukin-1 beta production|pyroptosis|positive regulation of interleukin-1|inflammatory response|innate immune response'
    commonly_overmapped_to:
      - GO:0032611   # interleukin-1 beta production
      - GO:0070269   # pyroptosis
      - GO:0050729   # positive regulation of inflammatory response
      - GO:0002221   # pattern recognition receptor signaling pathway
    patterns:
      - 'inflammasome\s+(activation|assembly|complex)'
      - 'NLRP3\s+(inflammasome|activation)'
      - 'ASC\s+speck'
      - 'caspase-?1\s+(activation|cleavage|activity)'
      - 'IL-?1(β|beta)\s+(maturation|secretion|release|processing)'
      - 'pyroptosis'
      - 'GSDMD|gasdermin'

  # ---- Molecular, low-convergence readouts (high-reliability controls) ----

  IN_VITRO_ENZYME:
    reports_state_of: gene product's own catalytic activity
    proximity: molecular
    convergence: low
    patterns:
      - 'in\s+vitro\s+(enzym|activ|assay|reconstitut)'
      - 'purified\s+(recombinant\s+)?protein'
      - 'specific\s+activity'
      - '\bkcat\b'
      - '\bKm\b'
      - 'enzymatic\s+assay'
      - 'reconstitut'

  DIRECT_BINDING:
    reports_state_of: direct physical interaction with a specific ligand
    proximity: molecular
    convergence: low
    patterns:
      - 'isothermal\s+titration'
      - '\bITC\b'
      - 'surface\s+plasmon'
      - '\bSPR\b'
      - 'co-?crystal'
      - 'crystal\s+structure'
      - 'cryo-?EM'

  # Rubidium (86Rb+/Rb+) flux is the classic functional readout for K+ channel /
  # transporter activity (Rb+ as a K+ congener tracer). Unlike the phenotypic Ca2+
  # imaging hub, it is a near-DIRECT measure of the gene product's own transport
  # activity, so it sits on the molecular / low-convergence end and LICENSES an MF
  # channel-activity term -- a positive control for the proximity axis. (Caveat:
  # it is direct only for the pore-forming channel; flux moved by perturbing an
  # upstream regulator/subunit is the same indirect inference as the hubs.)
  RUBIDIUM_FLUX:
    reports_state_of: potassium channel / K+ transporter activity (Rb+ flux)
    proximity: molecular
    convergence: low
    aligned_label_regex: 'potassium ion (transmembrane )?transport|potassium channel|potassium ion export|potassium ion import|voltage-gated potassium|inward rectif|outward rectif|K\+ channel'
    commonly_overmapped_to:
      - GO:0005267   # potassium channel activity
      - GO:0005249   # voltage-gated potassium channel activity
      - GO:0071805   # potassium ion transmembrane transport
      - GO:0006813   # potassium ion transport
    patterns:
      - 'rubidium'
      - '86\s*Rb\b'
      - '\(86\)\s*Rb'
      - '\bRb[\s-]?86\b'

  # Electrophysiology is the gold-standard DIRECT readout of ion-channel/transporter
  # activity (molecular, low-convergence) -- licenses an MF channel-activity term.
  # The same upstream-regulator caveat as RUBIDIUM_FLUX applies.
  ELECTROPHYSIOLOGY:
    reports_state_of: ion channel / transporter activity (direct electrophysiology)
    proximity: molecular
    convergence: low
    aligned_label_regex: 'channel activity|ion transmembrane transport|voltage-gated|ligand-gated|gated channel|ion channel|transmembrane transporter activity|cation transport|anion transport|ion transport'
    commonly_overmapped_to:
      - GO:0005216   # ion channel activity
      - GO:0022857   # transmembrane transporter activity
      - GO:0034220   # ion transmembrane transport
      - GO:0005244   # voltage-gated ion channel activity
    patterns:
      - 'patch[- ]?clamp'
      - 'voltage[- ]?clamp'
      - 'whole[- ]?cell\s+(current|recording|patch|configuration)'
      - 'single[- ]?channel\s+(record|conduct|current|open)'
      - 'two[- ]?electrode\s+voltage'
      - '\bTEVC\b'
      - 'current[- ]?voltage\s+relation'
      - 'electrophysiolog'

  KINASE_ACTIVITY_ASSAY:
    reports_state_of: protein kinase catalytic activity (in vitro)
    proximity: molecular
    convergence: low
    aligned_label_regex: 'kinase activity|protein kinase|phosphotransferase|protein serine/threonine kinase|protein tyrosine kinase'
    commonly_overmapped_to:
      - GO:0004672   # protein kinase activity
      - GO:0016301   # kinase activity
      - GO:0004674   # protein serine/threonine kinase activity
      - GO:0004713   # protein tyrosine kinase activity
    patterns:
      - 'in\s*vitro\s+kinase\s+assay'
      - 'kinase\s+assay'
      - '(γ|gamma)-?32P'
      - '\[32P\]'
      - '32P-?(ATP|γ|gamma)'
      - 'ADP-?Glo'
      - 'autophosphorylat'
      - 'radiometric\s+kinase'

  GTPASE_ACTIVITY:
    reports_state_of: GTPase / GAP / GEF activity
    proximity: molecular
    convergence: low
    aligned_label_regex: 'GTPase activity|GTP binding|GTPase activator|guanyl-nucleotide exchange|GDP/GTP exchange'
    commonly_overmapped_to:
      - GO:0003924   # GTPase activity
      - GO:0005096   # GTPase activator activity
      - GO:0005085   # guanyl-nucleotide exchange factor activity
      - GO:0005525   # GTP binding
    patterns:
      - 'GTP\s*hydrolysis'
      - 'GTPase\s+assay'
      - 'GTPase[- ]?activating'
      - 'nucleotide\s+exchange\s+(assay|factor|activity)'
      - 'GST-?(RBD|PBD)'
      - 'GTP(γ|gamma)S'

  UBIQUITINATION_ASSAY:
    reports_state_of: ubiquitin (-like) ligase / conjugation activity (in vitro)
    proximity: molecular
    convergence: low
    aligned_label_regex: 'ubiquitin.{0,20}ligase|ubiquitin protein transferase|ubiquitin-protein transferase|ubiquitin conjugat|ubiquitin-like protein (ligase|transferase)|SUMO (ligase|transferase)'
    commonly_overmapped_to:
      - GO:0061630   # ubiquitin protein ligase activity
      - GO:0004842   # ubiquitin-protein transferase activity
      - GO:0019787   # ubiquitin-like protein transferase activity
    patterns:
      - 'in\s*vitro\s+ubiquitin'
      - 'ubiquitin(ation|ylation)\s+assay'
      - 'auto-?ubiquitin'
      - 'E3\s+(ligase|ubiquitin)\s+(assay|activity)'
      - 'ubiquitin\s+ligase\s+assay'
      - 'thioester\s+(assay|formation|intermediate)'

  # ChIP / EMSA give DIRECT evidence of sequence-specific DNA binding -- the
  # molecular counterpart to the phenotypic TRANSCRIPTIONAL_REPORTER. Licenses an
  # MF DNA-binding-TF activity term for a genuine TF.
  CHROMATIN_CHIP:
    reports_state_of: direct DNA / chromatin binding (ChIP / EMSA)
    proximity: molecular
    convergence: low
    aligned_label_regex: 'DNA-binding transcription factor activity|sequence-specific DNA binding|cis-regulatory region|transcription cis-regulatory|chromatin binding|DNA-binding transcription'
    commonly_overmapped_to:
      - GO:0003700   # DNA-binding transcription factor activity
      - GO:0000981   # DNA-binding transcription factor activity, RNA polymerase II-specific
      - GO:0043565   # sequence-specific DNA binding
      - GO:0000976   # transcription cis-regulatory region binding
    patterns:
      - 'chromatin\s+immunoprecipitation'
      - 'ChIP-?(seq|qPCR|PCR|chip)\b'
      - 'ChIP\s+(assay|analysis|experiment|peak)'
      - 'electrophoretic\s+mobility\s+shift'
      - '\bEMSA\b'
      - 'gel\s+shift\s+assay'
      - 'DNA\s+pull-?down'

  PROTEASE_ACTIVITY:
    reports_state_of: peptidase / protease catalytic activity (in vitro)
    proximity: molecular
    convergence: low
    aligned_label_regex: 'peptidase activity|endopeptidase|exopeptidase|metallopeptidase|serine-type peptidase|cysteine-type peptidase|aspartic-type peptidase|aminopeptidase|carboxypeptidase|proteolysis'
    commonly_overmapped_to:
      - GO:0008233   # peptidase activity
      - GO:0004252   # serine-type endopeptidase activity
      - GO:0004222   # metalloendopeptidase activity
      - GO:0006508   # proteolysis
    patterns:
      - 'protease\s+assay'
      - 'peptidase\s+assay'
      - 'proteolytic\s+(assay|cleavage|activity)'
      - 'fluorogenic\s+(peptide\s+)?substrate'
      - 'zymography'

  NUCLEASE_ACTIVITY:
    reports_state_of: nuclease catalytic activity (in vitro)
    proximity: molecular
    convergence: low
    aligned_label_regex: 'nuclease activity|endonuclease|exonuclease|ribonuclease|deoxyribonuclease|RNA endonuclease|DNA endonuclease'
    commonly_overmapped_to:
      - GO:0004518   # nuclease activity
      - GO:0004519   # endonuclease activity
      - GO:0004527   # exonuclease activity
      - GO:0004540   # RNA nuclease activity
    patterns:
      - 'nuclease\s+assay'
      - 'in\s*vitro\s+nuclease'
      - '(endo|exo)nuclease\s+(assay|activity)'
      - 'DNA\s+cleavage\s+assay'
      - 'RNA\s+cleavage\s+assay'
      - 'ribonuclease\s+(assay|activity)'

  LIPID_TRANSFER_FLIPPASE:
    reports_state_of: lipid transporter / transfer / flippase activity
    proximity: molecular
    convergence: low
    aligned_label_regex: 'lipid transport|lipid transfer|phospholipid transport|phospholipid translocat|flippase|scramblase|sterol transport|cholesterol transport|lipid transporter'
    commonly_overmapped_to:
      - GO:0005319   # lipid transporter activity
      - GO:0140303   # intramembrane lipid transporter activity
      - GO:0120013   # intermembrane lipid transfer activity
      - GO:0006869   # lipid transport
    patterns:
      - 'flippase'
      - 'floppase'
      - 'scramblase'
      - 'phospholipid\s+(translocat|flip|scrambl)'
      - 'NBD-?(PC|PS|PE|PG|lipid|phospho)'
      - 'lipid\s+transfer\s+(assay|activity)'
      - '(cholesterol|sterol)\s+transfer\s+(assay|activity)'
      - 'lipid\s+transport\s+assay'

  PROTEASOME_ACTIVITY:
    reports_state_of: proteasome catalytic (chymotrypsin-like) activity
    proximity: molecular
    convergence: low
    aligned_label_regex: 'threonine-type endopeptidase|threonine-type peptidase|proteasome|proteasomal|proteasome-mediated|ubiquitin-dependent protein catabolic'
    commonly_overmapped_to:
      - GO:0004298   # threonine-type endopeptidase activity
      - GO:0043161   # proteasome-mediated ubiquitin-dependent protein catabolic process
      - GO:0000502   # proteasome complex
    patterns:
      - 'Suc-?LLVY'
      - 'LLVY-?AMC'
      - 'chymotrypsin-?like\s+(activity|peptidase)'
      - 'proteasome\s+(peptidase\s+)?activity'
      - '20S\s+proteasome\s+activity'

  METHYLTRANSFERASE_ACTIVITY:
    reports_state_of: methyltransferase catalytic activity (in vitro)
    proximity: molecular
    convergence: low
    aligned_label_regex: 'methyltransferase activity|histone methyltransferase|DNA methyltransferase|N-methyltransferase|O-methyltransferase|protein.{0,15}methyltransferase|methylation'
    commonly_overmapped_to:
      - GO:0008168   # methyltransferase activity
      - GO:0042054   # histone methyltransferase activity
      - GO:0008170   # N-methyltransferase activity
      - GO:0032259   # methylation
    patterns:
      - 'methyltransferase\s+(assay|activity)'
      - '(DNMT|PRMT|HMTase)\s+(assay|activity)'
      - '\bHMT\s+(assay|activity)'
      - '(\[3H\]|tritiated)\s*-?SAM'
      - 'methylation\s+assay'

  ACETYLTRANSFERASE_DEACETYLASE:
    reports_state_of: acetyltransferase (HAT) / deacetylase (HDAC) activity (in vitro)
    proximity: molecular
    convergence: low
    aligned_label_regex: 'acetyltransferase activity|histone acetyltransferase|deacetylase activity|histone deacetylase|protein deacetylase|NAD-dependent.{0,18}deacetylase|N-acetyltransferase|acetylation'
    commonly_overmapped_to:
      - GO:0004402   # histone acetyltransferase activity
      - GO:0016407   # acetyltransferase activity
      - GO:0004407   # histone deacetylase activity
      - GO:0033558   # protein lysine deacetylase activity
    patterns:
      - '(HAT|acetyltransferase)\s+(assay|activity)'
      - '(HDAC|deacetylase)\s+(assay|activity)'
      - 'in\s*vitro\s+(de)?acetylation\s+assay'
      - 'deacetylation\s+assay'
      - 'fluor[- ]?de[- ]?lys'

  PHOSPHATASE_ACTIVITY:
    reports_state_of: phosphatase catalytic activity (in vitro)
    proximity: molecular
    convergence: low
    aligned_label_regex: 'phosphatase activity|phosphoprotein phosphatase|protein tyrosine phosphatase|protein serine/threonine phosphatase|phosphoric.{0,12}hydrolase|dephosphorylat'
    commonly_overmapped_to:
      - GO:0016791   # phosphatase activity
      - GO:0004721   # phosphoprotein phosphatase activity
      - GO:0004725   # protein tyrosine phosphatase activity
      - GO:0004722   # protein serine/threonine phosphatase activity
    patterns:
      - 'phosphatase\s+(assay|activity\s+assay)'
      - '(pNPP|p-?nitrophenyl\s*phosphate)'
      - 'malachite\s+green'
      - 'dephosphorylation\s+assay'
      - 'in\s*vitro\s+dephosphorylation'

  POLYMERASE_ACTIVITY:
    reports_state_of: DNA/RNA polymerase / nucleotidyltransferase activity (in vitro)
    proximity: molecular
    convergence: low
    aligned_label_regex: 'polymerase activity|DNA-directed DNA polymerase|DNA-directed RNA polymerase|RNA polymerase|nucleotidyltransferase|RNA-directed DNA polymerase|reverse transcriptase'
    commonly_overmapped_to:
      - GO:0003887   # DNA-directed DNA polymerase activity
      - GO:0003899   # DNA-directed RNA polymerase activity
      - GO:0016779   # nucleotidyltransferase activity
      - GO:0003964   # RNA-directed DNA polymerase activity
    patterns:
      - 'polymerase\s+(assay|activity)'
      - 'primer\s+extension'
      - 'in\s*vitro\s+(transcription|replication)\s+assay'
      - 'nucleotide\s+incorporation\s+assay'
      - 'strand\s+displacement\s+(synthesis|assay)'
      - 'reverse\s+transcriptase\s+activity'

  HELICASE_ACTIVITY:
    reports_state_of: helicase unwinding activity (in vitro)
    proximity: molecular
    convergence: low
    aligned_label_regex: 'helicase activity|DNA helicase|RNA helicase|ATP-dependent.{0,12}helicase|DNA duplex unwind|four-way junction helicase'
    commonly_overmapped_to:
      - GO:0004386   # helicase activity
      - GO:0003678   # DNA helicase activity
      - GO:0003724   # RNA helicase activity
    patterns:
      - 'helicase\s+(assay|activity)'
      - 'unwinding\s+(assay|activity)'
      - 'strand\s+separation\s+assay'
      - 'duplex\s+unwinding'

  # RNA CLIP/RIP/REMSA give DIRECT evidence of RNA binding -- the RNA counterpart
  # to CHROMATIN_CHIP. Licenses an MF RNA-binding term.
  RNA_BINDING_CLIP:
    reports_state_of: direct RNA binding (CLIP / RIP / RNA-EMSA)
    proximity: molecular
    convergence: low
    aligned_label_regex: 'RNA binding|mRNA binding|single-stranded RNA binding|double-stranded RNA binding|rRNA binding|tRNA binding|mRNA 3.{0,4}UTR binding|miRNA binding|pre-mRNA.{0,10}binding'
    commonly_overmapped_to:
      - GO:0003723   # RNA binding
      - GO:0003729   # mRNA binding
      - GO:0003725   # double-stranded RNA binding
      - GO:0019843   # rRNA binding
    patterns:
      - 'CLIP-?seq'
      - 'HITS-?CLIP'
      - 'PAR-?CLIP'
      - '\biCLIP\b'
      - '\beCLIP\b'
      - 'RNA\s+immunoprecipitation'
      - 'RIP-?(seq|qPCR)'
      - 'RIP\s+assay'
      - 'RNA\s+pull-?down'
      - 'RNA\s+EMSA'
      - 'filter\s+binding\s+assay'

  TRANSPORTER_UPTAKE:
    reports_state_of: transmembrane transporter activity (tracer substrate uptake)
    proximity: molecular
    convergence: low
    aligned_label_regex: 'transmembrane transporter activity|symporter|antiporter|transporter activity|amino acid transmembrane transport|neurotransmitter.{0,15}transport|bile acid.{0,12}transport|organic anion transport'
    commonly_overmapped_to:
      - GO:0022857   # transmembrane transporter activity
      - GO:0015171   # amino acid transmembrane transporter activity
      - GO:0005215   # transporter activity
      - GO:0006810   # transport
    patterns:
      - '(glutamate|GABA|dopamine|serotonin|noradrenaline|taurocholate|bile\s+acid|cystine|glutamine|leucine|arginine|choline)\s+uptake'
      - 'radiolabel\w*\s+(substrate\s+)?uptake'
      - 'substrate\s+uptake\s+assay'
      - 'Na\+?[- ]?dependent\s+(uptake|transport)'
      - '\[(3H|14C)\][- ]?(glutamate|GABA|dopamine|serotonin|taurocholate|leucine|glutamine|cystine|choline)'

  CHAPERONE_REFOLDING:
    reports_state_of: chaperone holdase / foldase (refolding, aggregation prevention)
    proximity: molecular
    convergence: low
    aligned_label_regex: 'unfolded protein binding|chaperone|protein refolding|ATP-dependent protein folding|misfolded protein binding|heat shock protein binding|chaperone-mediated protein folding'
    commonly_overmapped_to:
      - GO:0051082   # unfolded protein binding
      - GO:0006457   # protein folding
      - GO:0061077   # chaperone-mediated protein folding
      - GO:0042026   # protein refolding
    patterns:
      - 'in\s*vitro\s+(re)?folding\s+assay'
      - 'protein\s+refolding\s+assay'
      - 'luciferase\s+refolding'
      - 'citrate\s+synthase\s+(aggregation|thermal)'
      - 'aggregation\s+(prevention|suppression)\s+assay'
      - 'holdase'
      - 'foldase'
      - 'chaperone\s+activity\s+assay'
      - 'thermal\s+aggregation\s+assay'

  GLYCOSYLTRANSFERASE_ACTIVITY:
    reports_state_of: glycosyltransferase / glycan-synthesis activity (in vitro)
    proximity: molecular
    convergence: low
    aligned_label_regex: 'glycosyltransferase|glucosyltransferase|galactosyltransferase|sialyltransferase|fucosyltransferase|mannosyltransferase|acetylglucosaminyltransferase|protein glycosylation'
    commonly_overmapped_to:
      - GO:0016757   # glycosyltransferase activity
      - GO:0008375   # acetylglucosaminyltransferase activity
      - GO:0008373   # sialyltransferase activity
      - GO:0006486   # protein glycosylation
    patterns:
      - 'glycosyltransferase\s+(assay|activity)'
      - '(sialyl|fucosyl|galactosyl|glucosyl|mannosyl)transferase\s+(assay|activity)'
      - 'in\s*vitro\s+glycosylation\s+assay'
      - 'glycosylation\s+assay'
      - 'UDP-?\[?[^\]]{0,6}\]?(glucose|galactose|GlcNAc|GalNAc)'
