{
  "filename": "plot_2.png",
  "iteration": 2,
  "description": "Kyte-Doolittle hydropathy profile and transmembrane helix prediction for Rv0311",
  "timestamp": "2026-06-22 00:38:47",
  "code": "\nimport matplotlib\nmatplotlib.use('Agg')\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nsequence = \"MSQSRYAGLSRSELAVLLPELLLIGQLIDRSGMAWCIQAFGRQEMLQIAIEEWAGASPIYTKRMQKALNFEGDDVPTIFKGLQLDIGAPPQFMDFRFTLHDRWHGEFHLDHCGALLDVEPMGDDYVVGMCHTIEDPTFDATAIATNPRAQVRPIHRPPRKPADRHPHCAWTVIIDESYPEAEGIPALDAVRETKAATWELDNVDASDDGLVDYSGPLVSDLDFGAFSHSALVRMADEVCLQMHLLNLSFAIAVRKRAKADAQLAISVNTRQLIGVAGLGAERIHRAMALPGGIEGALGVLELHPLLNPAGYVLAETSPDRLVVHNSPAHADGAWISLCTPASVQPLQAIATAVDPHLKVRISGTDTDWTAELIEADAPASELPEVLVAKVSRGSVFQFEPRRSLPLTVK\"\n\n# Kyte-Doolittle hydropathy scale\nkd_scale = {\n    'A': 1.8, 'R': -4.5, 'N': -3.5, 'D': -3.5, 'C': 2.5,\n    'Q': -3.5, 'E': -3.5, 'G': -0.4, 'H': -3.2, 'I': 4.5,\n    'L': 3.8, 'K': -3.9, 'M': 1.9, 'F': 2.8, 'P': -1.6,\n    'S': -0.8, 'T': -0.7, 'W': -0.9, 'Y': -1.3, 'V': 4.2\n}\n\n# Compute Kyte-Doolittle hydropathy profile with window size 19\nwindow = 19\nhydropathy = []\npositions = []\nfor i in range(len(sequence) - window + 1):\n    window_seq = sequence[i:i+window]\n    score = np.mean([kd_scale.get(aa, 0) for aa in window_seq])\n    hydropathy.append(score)\n    positions.append(i + window // 2 + 1)\n\n# Also compute with window 7 for finer resolution\nhydropathy_7 = []\npositions_7 = []\nwindow7 = 7\nfor i in range(len(sequence) - window7 + 1):\n    window_seq = sequence[i:i+window7]\n    score = np.mean([kd_scale.get(aa, 0) for aa in window_seq])\n    hydropathy_7.append(score)\n    positions_7.append(i + window7 // 2 + 1)\n\n# Simple TM helix prediction: regions with hydropathy > 1.6 for window=19\n# spanning at least 18 residues\ntm_regions = []\nin_tm = False\nstart = 0\nfor i, (pos, score) in enumerate(zip(positions, hydropathy)):\n    if score > 1.6 and not in_tm:\n        in_tm = True\n        start = pos\n    elif score <= 1.6 and in_tm:\n        if pos - start >= 15:\n            tm_regions.append((start, pos-1))\n        in_tm = False\nif in_tm and positions[-1] - start >= 15:\n    tm_regions.append((start, positions[-1]))\n\n# Signal peptide check: hydrophobic N-terminal region\n# Simple heuristic: hydrophobic stretch in first 30 residues\nn_term_hydropathy = np.mean([kd_scale.get(aa, 0) for aa in sequence[1:25]])\nprint(f\"=== TOPOLOGY ANALYSIS ===\")\nprint(f\"N-terminal region (residues 2-25) mean hydropathy: {n_term_hydropathy:.2f}\")\nprint(f\"  (>1.6 suggests signal peptide/TM; <0 suggests cytoplasmic)\")\n\n# Check for signal peptide-like features\n# Typical signal peptide: n-region (positive charge) -> h-region (hydrophobic) -> c-region (cleavage site)\nprint(f\"\\nFirst 40 residues: {sequence[:40]}\")\nprint(f\"Charges in first 10: \", end=\"\")\nfor aa in sequence[:10]:\n    if aa in ('R', 'K', 'H'):\n        print(f\"+({aa})\", end=\" \")\n    elif aa in ('D', 'E'):\n        print(f\"-({aa})\", end=\" \")\nprint()\n\n# Detailed hydropathy for N-terminal\nprint(f\"\\nResidue-by-residue hydropathy (first 40):\")\nfor i, aa in enumerate(sequence[:40]):\n    bar = \"#\" * max(0, int((kd_scale.get(aa, 0) + 4.5) * 2))\n    print(f\"  {i+1:3d} {aa} {kd_scale.get(aa, 0):5.1f} {bar}\")\n\nprint(f\"\\nPredicted TM regions (window=19, threshold=1.6):\")\nif tm_regions:\n    for start, end in tm_regions:\n        print(f\"  Residues {start}-{end}: {sequence[start-1:end]}\")\nelse:\n    print(\"  None detected\")\n\n# Check for hydrophobic stretches\nprint(f\"\\nHydrophobic stretches (window=7, score > 2.0):\")\nstretches = []\nin_stretch = False\nfor i, (pos, score) in enumerate(zip(positions_7, hydropathy_7)):\n    if score > 2.0 and not in_stretch:\n        in_stretch = True\n        start = pos\n    elif score <= 2.0 and in_stretch:\n        stretches.append((start, pos-1, max(hydropathy_7[max(0,i-10):i+1])))\n        in_stretch = False\nif in_stretch:\n    stretches.append((start, positions_7[-1], max(hydropathy_7[-10:])))\n\nfor s, e, peak in stretches:\n    print(f\"  Residues {s}-{e} (peak={peak:.2f}): {sequence[s-1:e]}\")\n\n# Plot\nfig, axes = plt.subplots(2, 1, figsize=(14, 8))\n\nax1 = axes[0]\nax1.set_title('Rv0311 Kyte-Doolittle Hydropathy Profile (window=19)', fontsize=12, fontweight='bold')\nax1.fill_between(positions, hydropathy, 0, where=[h > 0 for h in hydropathy], \n                  color='#FF9999', alpha=0.5, label='Hydrophobic')\nax1.fill_between(positions, hydropathy, 0, where=[h <= 0 for h in hydropathy], \n                  color='#9999FF', alpha=0.5, label='Hydrophilic')\nax1.plot(positions, hydropathy, 'k-', linewidth=1)\nax1.axhline(y=1.6, color='red', linestyle='--', alpha=0.5, label='TM threshold (1.6)')\nax1.axhline(y=0, color='gray', linestyle='-', alpha=0.3)\n\n# Mark TM regions\nfor start, end in tm_regions:\n    ax1.axvspan(start, end, color='red', alpha=0.2)\n\n# Mark His residues\nhis_positions_seq = [i+1 for i, aa in enumerate(sequence) if aa == 'H']\nfor hp in his_positions_seq:\n    ax1.axvline(x=hp, color='blue', alpha=0.1, linewidth=0.5)\n\nax1.set_xlabel('Residue Position')\nax1.set_ylabel('Hydropathy Score')\nax1.set_xlim(1, len(sequence))\nax1.legend(loc='upper right', fontsize=8)\n\n# Panel 2: Fine-resolution with intein feature positions marked\nax2 = axes[1]\nax2.set_title('Hydropathy Profile (window=7) with Key Residue Annotations', fontsize=12, fontweight='bold')\nax2.fill_between(positions_7, hydropathy_7, 0, where=[h > 0 for h in hydropathy_7],\n                  color='#FF9999', alpha=0.5)\nax2.fill_between(positions_7, hydropathy_7, 0, where=[h <= 0 for h in hydropathy_7],\n                  color='#9999FF', alpha=0.5)\nax2.plot(positions_7, hydropathy_7, 'k-', linewidth=0.8)\nax2.axhline(y=0, color='gray', linestyle='-', alpha=0.3)\n\n# Mark Cys residues\ncys_pos = [i+1 for i, aa in enumerate(sequence) if aa == 'C']\nfor cp in cys_pos:\n    ax2.axvline(x=cp, color='orange', alpha=0.5, linewidth=1.5, linestyle='--')\n    ax2.annotate(f'C{cp}', xy=(cp, max(hydropathy_7)*0.9), fontsize=7, color='orange', ha='center')\n\n# Mark His residues\nfor hp in his_positions_seq:\n    ax2.axvline(x=hp, color='blue', alpha=0.3, linewidth=1)\n\n# Mark the HxxH motif region\nax2.axvspan(108, 111, color='green', alpha=0.2, label='HxxH (108-111)')\n\nax2.set_xlabel('Residue Position')\nax2.set_ylabel('Hydropathy Score')\nax2.set_xlim(1, len(sequence))\nax2.legend(loc='upper right', fontsize=8)\n\nplt.tight_layout()\nplt.savefig('hydropathy_profile.png', dpi=150, bbox_inches='tight')\nplt.show()\nprint(\"\\nFigure saved: hydropathy_profile.png\")\n",
  "plot_number": 2
}