{
  "filename": "cfap300_alphafold_analysis.png",
  "iteration": 1,
  "description": "Analyze AlphaFold PAE matrix to identify domain boundaries in CFAP300",
  "timestamp": "2026-06-19 22:43:11",
  "code": "\nimport requests\nimport json\nimport matplotlib.pyplot as plt\nimport numpy as np\n\n# Download PAE matrix for CFAP300\npae_url = \"https://alphafold.ebi.ac.uk/files/AF-Q9BRQ4-F1-predicted_aligned_error_v6.json\"\nheaders = {'User-Agent': 'Mozilla/5.0'}\nresponse = requests.get(pae_url, headers=headers, timeout=30)\npae_data = response.json()\n\n# Parse PAE matrix\nif isinstance(pae_data, list) and len(pae_data) > 0:\n    pae_entry = pae_data[0]\n    pae_matrix = np.array(pae_entry.get('predicted_aligned_error', []))\n    \n    print(f\"PAE matrix shape: {pae_matrix.shape}\")\n    print(f\"Mean PAE: {np.mean(pae_matrix):.2f} \u00c5\")\n    print(f\"Median PAE: {np.median(pae_matrix):.2f} \u00c5\")\n    \n    # Plot PAE matrix\n    fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n    \n    # PAE heatmap\n    im = axes[0].imshow(pae_matrix, cmap='Greens_r', vmin=0, vmax=30)\n    axes[0].set_xlabel('Scored residue')\n    axes[0].set_ylabel('Aligned residue')\n    axes[0].set_title('CFAP300 Predicted Aligned Error (PAE)')\n    plt.colorbar(im, ax=axes[0], label='PAE (\u00c5)')\n    \n    # pLDDT plot from PDB\n    pdb_url = \"https://alphafold.ebi.ac.uk/files/AF-Q9BRQ4-F1-model_v6.pdb\"\n    response2 = requests.get(pdb_url, headers=headers, timeout=30)\n    pdb_text = response2.text\n    \n    plddt_values = []\n    residue_ids = []\n    seen = set()\n    for line in pdb_text.split('\\n'):\n        if line.startswith('ATOM') and line[12:16].strip() == 'CA':\n            resid = int(line[22:26].strip())\n            if resid not in seen:\n                seen.add(resid)\n                plddt = float(line[60:66].strip())\n                plddt_values.append(plddt)\n                residue_ids.append(resid)\n    \n    axes[1].plot(residue_ids, plddt_values, 'b-', linewidth=1.5)\n    axes[1].axhline(y=90, color='green', linestyle='--', alpha=0.5, label='Very high (\u226590)')\n    axes[1].axhline(y=70, color='orange', linestyle='--', alpha=0.5, label='Confident (\u226570)')\n    axes[1].axhline(y=50, color='red', linestyle='--', alpha=0.5, label='Low (\u226550)')\n    axes[1].set_xlabel('Residue number')\n    axes[1].set_ylabel('pLDDT')\n    axes[1].set_title('CFAP300 AlphaFold Confidence (pLDDT)')\n    axes[1].set_ylim(0, 100)\n    axes[1].legend(fontsize=8)\n    axes[1].fill_between(residue_ids, 0, plddt_values, alpha=0.2, color='blue')\n    \n    plt.tight_layout()\n    plt.savefig('cfap300_alphafold_analysis.png', dpi=150, bbox_inches='tight')\n    plt.show()\n    \n    # Identify potential domain boundaries from PAE\n    # Low PAE within blocks = same domain; high PAE between blocks = different domains\n    block_size = 20\n    n_blocks = pae_matrix.shape[0] // block_size\n    print(f\"\\nIntra-block PAE analysis (block size={block_size}):\")\n    for i in range(n_blocks):\n        start = i * block_size\n        end = min((i + 1) * block_size, pae_matrix.shape[0])\n        block_pae = pae_matrix[start:end, start:end]\n        mean_pae = np.mean(block_pae)\n        print(f\"  Block {start+1}-{end}: mean PAE = {mean_pae:.2f} \u00c5\")\n    \n    # Check cross-block PAE to identify domain structure\n    print(f\"\\nCross-block PAE (indicators of domain boundaries):\")\n    for i in range(n_blocks-1):\n        for j in range(i+1, n_blocks):\n            s1, e1 = i*block_size, (i+1)*block_size\n            s2, e2 = j*block_size, min((j+1)*block_size, pae_matrix.shape[0])\n            cross_pae = np.mean(pae_matrix[s1:e1, s2:e2])\n            if cross_pae > 10:\n                print(f\"  Blocks {s1+1}-{e1} vs {s2+1}-{e2}: mean PAE = {cross_pae:.1f} \u00c5 (possible domain boundary)\")\n\nelse:\n    print(\"Could not parse PAE data\")\n    print(str(pae_data)[:500])\n"
}