{
  "filename": "plot_3.png",
  "iteration": 2,
  "description": "Multi-species active-site alignment visualization showing key motif conservation in RvY_17310",
  "timestamp": "2026-06-22 00:57:48",
  "code": "\nimport matplotlib.pyplot as plt\nimport matplotlib.patches as mpatches\nimport numpy as np\n\n# Create a comprehensive multi-species alignment of the key active-site region\n# Using the known alignments from BLAST and manual alignment\n\n# Key region around Cu-binding His-x-His and adjacent residues\n# Human SOD1 (P00441) positions 42-70 (UniProt numbering)\n\n# Extracted active-site regions (manually aligned from known structures)\n# Format: species, accession, region1 (around His-x-His, ~pos 43-55), region2 (around His64-72)\n# Human SOD1: GLHGFHVHEFGDNTAGC...PHFNPLSRKH...EERHVGDLGN...HCIIGRTLVVHEKADDLGKGG...RLACGVIGIA\n# RvY_17310:  GDHGIHIHSFGDLSRGCE...PHFNPEGNHH...YQAHAGDLGN...TLVVGRTLVIHEKPDDLGRGG...RLACGIIGVS\n\n# Build an alignment display for the key motif regions\nfig, ax = plt.subplots(figsize=(18, 10))\nax.axis('off')\n\n# Title\nax.text(0.5, 0.98, 'Active-Site Sequence Conservation: RvY_17310 vs Characterized Cu/Zn-SODs', \n        transform=ax.transAxes, fontsize=14, fontweight='bold', ha='center', va='top')\nax.text(0.5, 0.95, 'Key metal-binding motifs from domain-extracted alignments', \n        transform=ax.transAxes, fontsize=10, ha='center', va='top', color='gray')\n\n# Define the three key motif regions to display\n# Region A: Cu-binding His-x-His (human ~44-56)\n# Region B: PHFN motif + Zn-binding (human ~64-84)  \n# Region C: Cu-binding His120 + ESL (human ~118-147)\n\nregions = {\n    'Region A: Cu-binding motif': {\n        'Human SOD1':   'GLHGFHVHEFGDNTAGC',\n        'Bovine SOD1':  'GDHGFHVHQFGDNTQGC',\n        'Drosophila':   'GLHGFHVHEFGDNTNGC',\n        'RvY_17310':    'GDHGIHIHSFGDLSRGCE',\n        'RvY_13070*':   'GDHGIVINSFGDLTTGCQ',  # 9/10 - His49->Val\n    },\n    'Region B: Zn-binding + PHFN': {\n        'Human SOD1':   'PHFNPLSRKHGGPKDEERHVGDLGN',\n        'Bovine SOD1':  'PHFNPLSKKHGGPKDEERHVGDLGN',\n        'Drosophila':   'PHFNPYGKEHGAPVDENRHLGDLGN',\n        'RvY_17310':    'PHFNPEGNHHGSPDDVQYQAHAGDLGN',\n        'RvY_13070*':   'PHFNKQGGQHGEPDDQRYQAHASDLGN',\n    },\n    'Region C: ESL + C-terminus': {\n        'Human SOD1':   'RTLVVHEKADDLGKGGNEESTKTGNAGSRLACGVIGIAQ',\n        'Bovine SOD1':  'RTMVVHEKPDDLGRGGNEESTKTGNAGSRLACGVIGIAK',\n        'Drosophila':   'RTVVVHADADDLGQGGHELSKSTGNAGARIGCGVIGIAKV',\n        'RvY_17310':    'RTLVIHEKPDDLGRGGDQTSRETGNSGARLACGIIGVSH-',\n        'RvY_13070*':   'RTLVVHEKPDDLGRGGEQESRETGNSGARLACGIIGISK-',\n    },\n}\n\n# Color scheme for residues\ndef get_color(aa, is_key_pos):\n    if is_key_pos:\n        return '#FF4444'  # red for key positions\n    elif aa == 'H':\n        return '#4488FF'  # blue for histidine\n    elif aa == 'D' or aa == 'E':\n        return '#FF8844'  # orange for acidic\n    elif aa == 'K' or aa == 'R':\n        return '#44BB44'  # green for basic\n    elif aa == 'C':\n        return '#FFDD44'  # yellow for cysteine\n    else:\n        return '#CCCCCC'  # gray for others\n\ny = 0.88\nfontsize = 10\nmonospace = 'monospace'\n\nfor region_name, seqs in regions.items():\n    ax.text(0.02, y, region_name, transform=ax.transAxes, fontsize=11, fontweight='bold', va='top')\n    y -= 0.03\n    \n    for species, seq in seqs.items():\n        # Species label\n        style = 'italic' if '*' not in species else 'normal'\n        color = 'red' if species == 'RvY_17310' else 'black'\n        weight = 'bold' if species == 'RvY_17310' else 'normal'\n        \n        ax.text(0.02, y, f'{species:>16s}  ', transform=ax.transAxes, fontsize=fontsize,\n                fontfamily=monospace, va='top', color=color, fontweight=weight)\n        \n        # Sequence with colored residues\n        x_start = 0.18\n        for i, aa in enumerate(seq):\n            # Determine if this is a key position (His, Cys, Asp at binding sites)\n            is_key = False\n            if species == 'RvY_17310':\n                is_key = aa in 'HCDR' and aa != '-'  # Highlight key residue types\n            \n            bg_color = 'white'\n            if aa == 'H':\n                bg_color = '#CCE5FF'\n            elif aa == 'C':\n                bg_color = '#FFFFCC'\n            elif aa in 'DE':\n                bg_color = '#FFE5CC'\n            elif aa in 'KR':\n                bg_color = '#CCFFCC'\n            \n            txt_color = 'red' if species == 'RvY_17310' and aa == 'H' else 'black'\n            \n            ax.text(x_start + i * 0.017, y, aa, transform=ax.transAxes, fontsize=fontsize-1,\n                    fontfamily=monospace, va='top', color=txt_color, fontweight='bold' if aa in 'HCDR' else 'normal',\n                    bbox=dict(boxstyle='square,pad=0.1', facecolor=bg_color, edgecolor='none') if aa != '-' else {})\n        \n        y -= 0.028\n    \n    y -= 0.015\n\n# Legend\ny -= 0.01\nlegend_items = [\n    ('His (H)', '#CCE5FF', '#4488FF'),\n    ('Cys (C)', '#FFFFCC', '#BBBB00'),\n    ('Asp/Glu (D/E)', '#FFE5CC', '#FF8844'),\n    ('Lys/Arg (K/R)', '#CCFFCC', '#44BB44'),\n]\nax.text(0.02, y, 'Legend:', transform=ax.transAxes, fontsize=10, fontweight='bold', va='top')\ny -= 0.025\nfor label, bg, fg in legend_items:\n    ax.text(0.04, y, '  ', transform=ax.transAxes, fontsize=10, fontfamily=monospace, va='top',\n            bbox=dict(boxstyle='square,pad=0.15', facecolor=bg, edgecolor='gray'))\n    ax.text(0.07, y, label, transform=ax.transAxes, fontsize=9, va='top')\n    y -= 0.025\n\nax.text(0.02, y-0.01, '* RvY_13070 has His49\u2192Val substitution at Cu-binding site (9/10 residues conserved)',\n        transform=ax.transAxes, fontsize=9, va='top', color='gray', style='italic')\nax.text(0.02, y-0.035, 'RvY_17310 (TARGET): All metal-binding His, Asp, Cys, and Arg residues are conserved',\n        transform=ax.transAxes, fontsize=10, va='top', color='red', fontweight='bold')\n\nplt.tight_layout()\nplt.savefig('alignment_comparison.png', dpi=150, bbox_inches='tight')\nplt.show()\nprint(\"Figure saved: alignment_comparison.png\")\n",
  "plot_number": 3
}