{
  "filename": "octs1_analysis.png",
  "iteration": 1,
  "description": "Generate a comprehensive active-site residue comparison table as a visualization",
  "timestamp": "2026-06-23 21:06:18",
  "code": "\nimport matplotlib\nmatplotlib.use('Agg')\nimport matplotlib.pyplot as plt\nimport numpy as np\n\n# Create a comprehensive summary figure\n\nfig, axes = plt.subplots(2, 2, figsize=(14, 10))\nfig.suptitle('OCTS1 (S-crystallin 1, P27013) \u2014 Pseudo-enzyme Analysis', fontsize=14, fontweight='bold')\n\n# Panel A: Amino acid composition comparison\nax = axes[0, 0]\naas = ['W', 'Y', 'C', 'M', 'S', 'R', 'K', 'L', 'E', 'A']\nocts1_seq = \"MPSYTLHYFNHRGRARSAVCCSQLLVSSTMTADRVFRMGQHEKQDAMSHDANVGIGQQNPNSPEYAMARYLAREFGFHGRNNMEMARVDFISDCFYDILDDYMRMYQDGNCRMMFQRSRDMSSSSEKRMRFQETCRRILPFMERTLEMYNGGSQYFMGDQMTMADMMCYCALENPLMEEPSMLSSYPKLMALRNRVMNHSKMSSYLQRRCRTDF\"\nsqgst_seq = \"MPKYTLHYFPLMGRAELCRFVLAAHGEEFTDRVVEMADWPNLKATMYSNAMPVLDIDGTKMSQSMCIARHLAREFGLDGKYLMQPQVDLTNLYQKILDQYEEFLTKNPRLAFMQARASRSRYEERKRWFHETAEKFLDLIEQNLVMLPQGELFAGINQMMAEQMAFYQRLEAPLMEQPTFIVLQPKLLALKEKINHEVYASYLVRKAETF\"\n\nocts1_pct = [octs1_seq.count(aa)/len(octs1_seq)*100 for aa in aas]\nsqgst_pct = [sqgst_seq.count(aa)/len(sqgst_seq)*100 for aa in aas]\n\nx = np.arange(len(aas))\nwidth = 0.35\nbars1 = ax.bar(x - width/2, octs1_pct, width, label='OCTS1 (S-crystallin)', color='#e74c3c', alpha=0.8)\nbars2 = ax.bar(x + width/2, sqgst_pct, width, label='Squid \u03c3-GST (active)', color='#3498db', alpha=0.8)\nax.set_xlabel('Amino Acid')\nax.set_ylabel('Frequency (%)')\nax.set_title('A. Composition: S-crystallin vs Active GST')\nax.set_xticks(x)\nax.set_xticklabels(aas)\nax.legend(fontsize=8)\n# Highlight W=0 for OCTS1\nax.annotate('W = 0!', xy=(0 - width/2, 0.1), fontsize=9, fontweight='bold', color='red', ha='center')\n\n# Panel B: Activity comparison (bar chart)\nax = axes[0, 1]\nproteins = ['Squid \u03c3-GST\\n(P46088)', 'OCTS1\\n(S-crystallin 1)', 'OCTS1 mutant\\n(4 reversions)']\n# Relative activity (squid GST = 100%)\nactivities = [100, 0.1, 80]  # Approximate from literature\ncolors = ['#3498db', '#e74c3c', '#2ecc71']\nbars = ax.bar(proteins, activities, color=colors, alpha=0.8, edgecolor='black')\nax.set_ylabel('Relative GST Activity (%)')\nax.set_title('B. GST Activity Comparison')\nax.set_yscale('log')\nax.set_ylim(0.01, 200)\nax.axhline(y=1, color='gray', linestyle='--', alpha=0.5, label='1% threshold')\nax.legend(fontsize=8)\n# Add value labels\nfor bar, val in zip(bars, activities):\n    ax.text(bar.get_x() + bar.get_width()/2., bar.get_height() * 1.3, \n            f'{val}%', ha='center', va='bottom', fontweight='bold', fontsize=10)\n\n# Panel C: Key residue conservation table\nax = axes[1, 0]\nax.axis('off')\ntable_data = [\n    ['Residue', 'Squid \u03c3-GST', 'OCTS1', 'Role', 'Status'],\n    ['Tyr4', 'Y4', 'Y4', 'Catalytic (GSH activation)', '\u2713 Conserved'],\n    ['Tyr8', 'Y8', 'Y8', 'Active site', '\u2713 Conserved'],\n    ['Trp39', 'W39', 'H41', 'Substrate binding/activity', '\u2717 LOST (\u2192His)'],\n    ['Trp128', 'W128', '(absent)', 'Activity', '\u2717 LOST'],\n    ['Insertion', '\u2014', '11-aa insert', 'Occludes active site', '\u2717 Blocks access'],\n]\ntable = ax.table(cellText=table_data[1:], colLabels=table_data[0], \n                  cellLoc='center', loc='center')\ntable.auto_set_font_size(False)\ntable.set_fontsize(9)\ntable.scale(1, 1.5)\n# Color the header\nfor j in range(len(table_data[0])):\n    table[0, j].set_facecolor('#34495e')\n    table[0, j].set_text_props(color='white', fontweight='bold')\n# Color status column\nfor i in range(1, len(table_data)):\n    if '\u2717' in table_data[i][-1]:\n        table[i, 4].set_facecolor('#fadbd8')\n    else:\n        table[i, 4].set_facecolor('#d5f5e3')\nax.set_title('C. Active-Site Residue Conservation', fontweight='bold', pad=20)\n\n# Panel D: Evidence summary\nax = axes[1, 1]\nax.axis('off')\nevidence_text = \"\"\"VERDICT: Pseudo-enzyme / Activity Lost\n(TreeGrafter Failure Mode #2)\n\nSeed hypothesis: GO:0004364 (glutathione transferase activity)\nStatus: REFUTED\n\nEvidence:\n\u2022 kcat 1000\u00d7 lower than conspecific GST (PMID:8827456)\n\u2022 Crystal structure shows altered active site (PMID:27499004)\n\u2022 4 reversal mutations restore GST activity (PMID:27499004)\n\u2022 11-aa insertion occludes active center (PMID:9929473)\n\u2022 Critical W39 completely absent (this analysis)\n\u2022 Expressed exclusively in lens tissue (PMID:7639695)\n\nCorrect annotation:\n  GO:0005212 (structural constituent of eye lens)\n\nCuration action: REMOVE GO:0004364; retain GO:0005212\"\"\"\n\nax.text(0.05, 0.95, evidence_text, transform=ax.transAxes, fontsize=9,\n        verticalalignment='top', fontfamily='monospace',\n        bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.8))\nax.set_title('D. Executive Summary', fontweight='bold')\n\nplt.tight_layout()\nplt.savefig('octs1_analysis.png', dpi=150, bbox_inches='tight')\nprint(\"Figure saved: octs1_analysis.png\")\n"
}