{
  "filename": "plot_2.png",
  "iteration": 2,
  "description": "Analyze HSPA12B AlphaFold structure: secondary structure, domain contacts, and comparison to canonical HSP70 SBD architecture",
  "timestamp": "2026-06-22 01:26:26",
  "code": "\nimport requests\nimport numpy as np\nimport matplotlib.pyplot as plt\n\n# Download AlphaFold PDB for HSPA12B\npdb_url = \"https://alphafold.ebi.ac.uk/files/AF-Q96MM6-F1-model_v6.pdb\"\nresponse = requests.get(pdb_url, timeout=60)\npdb_text = response.text\n\n# Parse full atomic data\nresidue_data = {}  # resnum -> {ca_coords, plddt, all_atoms}\nfor line in pdb_text.split('\\n'):\n    if line.startswith('ATOM'):\n        atom_name = line[12:16].strip()\n        resname = line[17:20].strip()\n        resnum = int(line[22:26].strip())\n        x = float(line[30:38].strip())\n        y = float(line[38:46].strip())\n        z = float(line[46:54].strip())\n        bfactor = float(line[60:66].strip())\n        \n        if resnum not in residue_data:\n            residue_data[resnum] = {'resname': resname, 'plddt': bfactor, 'atoms': {}}\n        residue_data[resnum]['atoms'][atom_name] = np.array([x, y, z])\n        if atom_name == 'CA':\n            residue_data[resnum]['ca'] = np.array([x, y, z])\n\n# ============================================================\n# SECONDARY STRUCTURE ESTIMATION from CA distances\n# ============================================================\n# Use CA-CA distance patterns to estimate secondary structure:\n# Alpha helix: ~5.5A per 4 residues (i to i+4 distance ~6.0-6.5A)\n# Beta sheet: ~7.0A per 2 residues (i to i+2 distance ~6.5-7.0A)\n# Coil: variable\n\nresidues = sorted(residue_data.keys())\nca_coords = {r: residue_data[r]['ca'] for r in residues if 'ca' in residue_data[r]}\n\n# Calculate i to i+3 CA distances (helix indicator)\nhelix_distances = {}\nfor r in residues:\n    if r in ca_coords and r+3 in ca_coords:\n        d = np.linalg.norm(ca_coords[r] - ca_coords[r+3])\n        helix_distances[r] = d\n\n# Calculate i to i+2 CA distances (sheet indicator) \nsheet_distances = {}\nfor r in residues:\n    if r in ca_coords and r+2 in ca_coords:\n        d = np.linalg.norm(ca_coords[r] - ca_coords[r+2])\n        sheet_distances[r] = d\n\n# Simple secondary structure assignment\nss_assignment = {}\nfor r in residues:\n    if r in helix_distances:\n        d3 = helix_distances[r]\n        d2 = sheet_distances.get(r, 10)\n        if d3 < 5.7:  # helix-like\n            ss_assignment[r] = 'H'\n        elif d2 > 6.0 and d2 < 7.5:  # extended/sheet-like\n            ss_assignment[r] = 'E'\n        else:\n            ss_assignment[r] = 'C'\n    else:\n        ss_assignment[r] = 'C'\n\n# Count secondary structure by region\nregions = [\n    (\"N-terminal (1-59)\", 1, 59),\n    (\"NBD Lobe I (60-250)\", 60, 250),\n    (\"Insert (251-312)\", 251, 312),\n    (\"NBD Lobe II (313-529)\", 313, 529),\n    (\"C-terminal (530-686)\", 530, 686),\n]\n\nprint(\"=\" * 80)\nprint(\"SECONDARY STRUCTURE ESTIMATION FROM AlphaFold CA GEOMETRY\")\nprint(\"=\" * 80)\n\nfor region_name, start, end in regions:\n    region_ss = [ss_assignment.get(r, 'C') for r in range(start, end+1)]\n    h_count = region_ss.count('H')\n    e_count = region_ss.count('E')\n    c_count = region_ss.count('C')\n    total = len(region_ss)\n    print(f\"\\n{region_name}:\")\n    print(f\"  Helix: {h_count} ({h_count/total*100:.0f}%)\")\n    print(f\"  Sheet: {e_count} ({e_count/total*100:.0f}%)\")\n    print(f\"  Coil:  {c_count} ({c_count/total*100:.0f}%)\")\n    \n    # Print SS string\n    ss_str = ''.join(region_ss)\n    for i in range(0, len(ss_str), 60):\n        res_start = start + i\n        print(f\"  {res_start:4d}: {ss_str[i:i+60]}\")\n\n# ============================================================\n# COMPARE C-terminal secondary structure to canonical HSP70 SBD\n# ============================================================\nprint(\"\\n\\n\" + \"=\" * 80)\nprint(\"COMPARISON: HSPA12B C-terminal vs Canonical HSP70 SBD Architecture\")\nprint(\"=\" * 80)\n\n# Canonical HSP70 SBD (HSPA8 ~395-640):\n# SBD\u03b2 (395-509): 8 \u03b2-strands forming a sandwich \u2192 ~60% sheet\n# SBD\u03b1 (510-640): 5 \u03b1-helices forming lid \u2192 ~70% helix\n\n# HSPA12B C-terminal (530-686):\nc_term_ss = [ss_assignment.get(r, 'C') for r in range(530, 687)]\nc_h = c_term_ss.count('H')\nc_e = c_term_ss.count('E')\nc_c = c_term_ss.count('C')\nc_total = len(c_term_ss)\n\nprint(f\"\\n  Canonical HSP70 SBD\u03b2 (8 \u03b2-strands): ~60% sheet, ~10% helix\")\nprint(f\"  Canonical HSP70 SBD\u03b1 (5 \u03b1-helices): ~70% helix, ~5% sheet\")\nprint(f\"  HSPA12B C-terminal (530-686): {c_h/c_total*100:.0f}% helix, {c_e/c_total*100:.0f}% sheet, {c_c/c_total*100:.0f}% coil\")\nprint(f\"\\n  HSPA12B C-terminal is enriched in {'helix' if c_h > c_e else 'sheet'}-like structure\")\nprint(f\"  but the proportions do not match either SBD\u03b2 or SBD\u03b1 patterns\")\n\n# ============================================================\n# CONTACT MAP ANALYSIS - Check for interdomain contacts\n# ============================================================\nprint(\"\\n\\n\" + \"=\" * 80)\nprint(\"DOMAIN INTERACTION: CA Distance Between NBD and C-terminal Regions\")\nprint(\"=\" * 80)\n\n# Are the NBD lobes and C-terminal region in close contact?\nnbd1_center = np.mean([ca_coords[r] for r in range(100, 200) if r in ca_coords], axis=0)\nnbd2_center = np.mean([ca_coords[r] for r in range(350, 480) if r in ca_coords], axis=0)\ncterm_center = np.mean([ca_coords[r] for r in range(550, 680) if r in ca_coords], axis=0)\n\nd_nbd1_nbd2 = np.linalg.norm(nbd1_center - nbd2_center)\nd_nbd1_cterm = np.linalg.norm(nbd1_center - cterm_center)\nd_nbd2_cterm = np.linalg.norm(nbd2_center - cterm_center)\n\nprint(f\"\\n  Center-of-mass distances:\")\nprint(f\"    NBD Lobe I \u2194 NBD Lobe II: {d_nbd1_nbd2:.1f} \u00c5 (expected close for NBD)\")\nprint(f\"    NBD Lobe I \u2194 C-terminal:  {d_nbd1_cterm:.1f} \u00c5\")\nprint(f\"    NBD Lobe II \u2194 C-terminal: {d_nbd2_cterm:.1f} \u00c5\")\n\nif d_nbd2_cterm < 25:\n    print(\"\\n  \u2192 C-terminal region packs against NBD (close contact)\")\nelif d_nbd2_cterm < 40:\n    print(\"\\n  \u2192 C-terminal region has moderate proximity to NBD\")\nelse:\n    print(\"\\n  \u2192 C-terminal region is separated from NBD\")\n\n# ============================================================\n# Create comprehensive figure\n# ============================================================\nfig, axes = plt.subplots(3, 1, figsize=(14, 14))\n\n# Plot 1: pLDDT profile\nax1 = axes[0]\nplddts = [residue_data[r]['plddt'] for r in residues if 'ca' in residue_data[r]]\nres_nums = [r for r in residues if 'ca' in residue_data[r]]\nax1.fill_between(res_nums, plddts, alpha=0.3, color='steelblue')\nax1.plot(res_nums, plddts, color='steelblue', linewidth=0.8)\nax1.axhline(y=90, color='green', linestyle='--', alpha=0.4)\nax1.axhline(y=70, color='orange', linestyle='--', alpha=0.4)\nax1.axhline(y=50, color='red', linestyle='--', alpha=0.4)\nfor label, start, end, color in [(\"N-ext\", 1, 59, 'red'), (\"NBD I\", 60, 250, 'blue'),\n                                   (\"Insert\", 251, 312, 'gray'), (\"NBD II\", 313, 529, 'green'),\n                                   (\"C-term\", 530, 686, 'purple')]:\n    ax1.axvspan(start, end, alpha=0.1, color=color)\n    ax1.text((start+end)/2, 103, label, ha='center', fontsize=8, fontweight='bold')\nax1.set_ylabel('pLDDT')\nax1.set_title('HSPA12B AlphaFold Confidence')\nax1.set_ylim(0, 110)\n\n# Plot 2: Secondary structure\nax2 = axes[1]\nss_colors = {'H': 'red', 'E': 'blue', 'C': 'gray'}\nfor r in residues:\n    if r in ss_assignment:\n        ax2.bar(r, 1, color=ss_colors[ss_assignment[r]], width=1, edgecolor='none')\nax2.set_ylabel('SS Type')\nax2.set_yticks([])\nax2.set_title('Estimated Secondary Structure (Red=Helix, Blue=Sheet, Gray=Coil)')\nfor label, start, end, color in [(\"N-ext\", 1, 59, 'red'), (\"NBD I\", 60, 250, 'blue'),\n                                   (\"Insert\", 251, 312, 'gray'), (\"NBD II\", 313, 529, 'green'),\n                                   (\"C-term\", 530, 686, 'purple')]:\n    ax2.axvspan(start, end, alpha=0.08, color=color)\n\n# Plot 3: CA-CA distance (helix indicator)\nax3 = axes[2]\nh_res = sorted(helix_distances.keys())\nh_vals = [helix_distances[r] for r in h_res]\nax3.plot(h_res, h_vals, color='darkred', linewidth=0.5, alpha=0.7)\nax3.axhline(y=5.5, color='red', linestyle='--', alpha=0.5, label='Helix threshold')\nax3.axhline(y=6.5, color='blue', linestyle='--', alpha=0.5, label='Sheet range')\nax3.set_xlabel('Residue Number')\nax3.set_ylabel('CA(i)-CA(i+3) Distance (\u00c5)')\nax3.set_title('CA(i)\u2192CA(i+3) Distance Profile (Low=Helix, High=Extended)')\nax3.legend(fontsize=8)\nfor label, start, end, color in [(\"N-ext\", 1, 59, 'red'), (\"NBD I\", 60, 250, 'blue'),\n                                   (\"Insert\", 251, 312, 'gray'), (\"NBD II\", 313, 529, 'green'),\n                                   (\"C-term\", 530, 686, 'purple')]:\n    ax3.axvspan(start, end, alpha=0.08, color=color)\n\nplt.tight_layout()\nplt.savefig('hspa12b_structure_analysis.png', dpi=150, bbox_inches='tight')\nplt.show()\nprint(\"\\nFigure saved: hspa12b_structure_analysis.png\")\n",
  "plot_number": 2
}