{
  "filename": "cfap300_surface_analysis.png",
  "iteration": 2,
  "description": "Analyze CFAP300 surface properties to identify potential binding interfaces",
  "timestamp": "2026-06-19 22:59:45",
  "code": "\nimport requests\nimport json\nimport numpy as np\nimport matplotlib.pyplot as plt\n\n# Run AlphaFold-Multimer via ColabFold/AlphaPulldown-like approach\n# We can't run full AF-Multimer here, but we can use the AlphaPulldown web API\n# Let's check if pre-computed AF2 complex predictions exist\n\n# Check EMBL AlphaFold Protein Interaction database for CFAP300\n# Also try the EBI AlphaFold complexes search\nurl = \"https://alphafold.ebi.ac.uk/api/prediction/Q9BRQ4\"\nheaders = {'User-Agent': 'Mozilla/5.0'}\nresponse = requests.get(url, headers=headers, timeout=30)\naf_data = response.json()\nprint(\"AlphaFold entries for Q9BRQ4:\")\nfor entry in af_data:\n    print(f\"  Version: {entry.get('latestVersion')}\")\n    print(f\"  PDB URL: {entry.get('pdbUrl')}\")\n    print(f\"  Model type: {entry.get('modelType', 'N/A')}\")\n\n# Let's do a surface analysis of the AlphaFold structure to identify potential binding interfaces\n# Download the PDB and analyze surface properties\npdb_url = \"https://alphafold.ebi.ac.uk/files/AF-Q9BRQ4-F1-model_v6.pdb\"\nresponse = requests.get(pdb_url, headers=headers, timeout=30)\npdb_text = response.text\n\n# Parse all atoms\natoms = []\nfor line in pdb_text.split('\\n'):\n    if line.startswith('ATOM'):\n        atom_name = line[12:16].strip()\n        res_name = line[17:20].strip()\n        chain = line[21]\n        res_id = int(line[22:26].strip())\n        x = float(line[30:38].strip())\n        y = float(line[38:46].strip())\n        z = float(line[46:54].strip())\n        bfactor = float(line[60:66].strip())  # pLDDT\n        atoms.append({\n            'atom_name': atom_name,\n            'res_name': res_name,\n            'chain': chain,\n            'res_id': res_id,\n            'x': x, 'y': y, 'z': z,\n            'plddt': bfactor\n        })\n\n# Extract CA atoms for analysis\nca_atoms = [a for a in atoms if a['atom_name'] == 'CA']\n\n# Analyze charge distribution on the surface\n# Map amino acids to properties\ncharge_map = {\n    'ASP': -1, 'GLU': -1,  # negative\n    'LYS': 1, 'ARG': 1, 'HIS': 0.5,  # positive\n}\n\nhydrophobic_map = {\n    'ALA': 1.8, 'VAL': 4.2, 'ILE': 4.5, 'LEU': 3.8, 'MET': 1.9,\n    'PHE': 2.8, 'TRP': -0.9, 'PRO': -1.6,\n    'GLY': -0.4, 'SER': -0.8, 'THR': -0.7, 'CYS': 2.5, 'TYR': -1.3,\n    'ASN': -3.5, 'GLN': -3.5, 'ASP': -3.5, 'GLU': -3.5,\n    'LYS': -3.9, 'ARG': -4.5, 'HIS': -3.2\n}\n\n# Create property maps\ncharges = [charge_map.get(a['res_name'], 0) for a in ca_atoms]\nhydrophobicity = [hydrophobic_map.get(a['res_name'], 0) for a in ca_atoms]\nres_ids = [a['res_id'] for a in ca_atoms]\nplddt = [a['plddt'] for a in ca_atoms]\n\n# Identify potential binding patches\n# High confidence + charged/hydrophobic patches often indicate binding sites\nfig, axes = plt.subplots(3, 1, figsize=(14, 10), sharex=True)\n\naxes[0].bar(res_ids, charges, color=['red' if c < 0 else 'blue' if c > 0 else 'gray' for c in charges], width=1)\naxes[0].set_ylabel('Charge')\naxes[0].set_title('CFAP300 Surface Property Analysis (AlphaFold Structure)')\naxes[0].axhline(y=0, color='black', linewidth=0.5)\n\n# Hydrophobic patches (sliding window)\nwindow = 7\nhydro_smooth = np.convolve(hydrophobicity, np.ones(window)/window, mode='same')\naxes[1].fill_between(res_ids, 0, hydro_smooth, where=np.array(hydro_smooth) > 0, alpha=0.5, color='orange', label='Hydrophobic')\naxes[1].fill_between(res_ids, 0, hydro_smooth, where=np.array(hydro_smooth) <= 0, alpha=0.5, color='cyan', label='Hydrophilic')\naxes[1].set_ylabel('Hydrophobicity\\n(KD scale, 7-res window)')\naxes[1].legend(fontsize=8)\naxes[1].axhline(y=0, color='black', linewidth=0.5)\n\naxes[2].plot(res_ids, plddt, 'b-', linewidth=1.5)\naxes[2].fill_between(res_ids, 0, plddt, alpha=0.2, color='blue')\naxes[2].axhline(y=90, color='green', linestyle='--', alpha=0.5, label='Very high confidence')\naxes[2].axhline(y=70, color='orange', linestyle='--', alpha=0.5, label='Confident')\naxes[2].set_ylabel('pLDDT')\naxes[2].set_xlabel('Residue number')\naxes[2].legend(fontsize=8)\n\nplt.tight_layout()\nplt.savefig('cfap300_surface_analysis.png', dpi=150, bbox_inches='tight')\nplt.show()\n\n# Identify notable patches\nprint(\"\\nNotable surface features:\")\n\n# Charged clusters (potential interaction surfaces)\nprint(\"\\nAcidic patches (3+ consecutive or nearby charged residues):\")\ncfap300_seq = \"MATGELGDLGGYYFRFLPQKTFQSLSSKEITSRLRQWSMLGRIKAQAFGFDQTFQSYRKDDFVMAFFKDPNVIPNLKLLSDSSGQWIILGTEVKKIEAINVPCTQLSMSFFHRLYDEDIVRDSGHIVKCLDSFCDPFLISDELRRVLLVEDSEKYEIFSQPDREEFLFCLFKHLCLGGALCQYEDVISPYLETTKLIYKDLVSVRKNPQTKKIQITSSVFKVSAYDSAGMCYPSAKNHEQTFSYFIVDPIRRHLHVLYHCYGVGDMS\"\n\nfor i in range(len(cfap300_seq) - 2):\n    window3 = cfap300_seq[i:i+5]\n    acidic_count = sum(1 for aa in window3 if aa in 'DE')\n    basic_count = sum(1 for aa in window3 if aa in 'KR')\n    if acidic_count >= 3:\n        print(f\"  Position {i+1}-{i+5}: {window3} ({acidic_count} acidic)\")\n    if basic_count >= 3:\n        print(f\"  Position {i+1}-{i+5}: {window3} ({basic_count} basic)\")\n\n# Conserved cysteines (potential structural disulfides or zinc fingers)\ncys_positions = [i+1 for i, aa in enumerate(cfap300_seq) if aa == 'C']\nprint(f\"\\nCysteine positions: {cys_positions}\")\nprint(f\"Total cysteines: {len(cys_positions)} in {len(cfap300_seq)} residues\")\n"
}