{
  "filename": "plot_1.png",
  "iteration": 1,
  "description": "Fetching and analyzing AlphaFold structural prediction and confidence scores for B7FXQ8 HSP20A",
  "timestamp": "2026-06-22 06:20:54",
  "code": "\nimport requests\nimport json\n\n# Check AlphaFold database for B7FXQ8\naf_url = \"https://alphafold.ebi.ac.uk/api/prediction/B7FXQ8\"\nresp = requests.get(af_url)\nif resp.status_code == 200:\n    af_data = resp.json()\n    if isinstance(af_data, list):\n        af_data = af_data[0]\n    print(\"=== AlphaFold prediction available ===\")\n    print(f\"UniProt: {af_data.get('uniprotAccession', 'N/A')}\")\n    print(f\"Model URL: {af_data.get('pdbUrl', 'N/A')}\")\n    print(f\"Confidence URL: {af_data.get('cifUrl', 'N/A')}\")\n    \n    # Download pLDDT scores from the PDB file\n    pdb_url = af_data.get('pdbUrl', '')\n    if pdb_url:\n        pdb_resp = requests.get(pdb_url)\n        pdb_text = pdb_resp.text\n        \n        # Extract pLDDT from B-factor column of ATOM records\n        plddt_scores = []\n        residue_numbers = []\n        for line in pdb_text.split('\\n'):\n            if line.startswith('ATOM') and line[12:16].strip() == 'CA':\n                resnum = int(line[22:26].strip())\n                bfactor = float(line[60:66].strip())\n                plddt_scores.append(bfactor)\n                residue_numbers.append(resnum)\n        \n        import numpy as np\n        plddt_array = np.array(plddt_scores)\n        resnum_array = np.array(residue_numbers)\n        \n        print(f\"\\n=== pLDDT confidence scores ===\")\n        print(f\"Mean pLDDT: {np.mean(plddt_array):.1f}\")\n        print(f\"Median pLDDT: {np.median(plddt_array):.1f}\")\n        print(f\"Min pLDDT: {np.min(plddt_array):.1f} at residue {resnum_array[np.argmin(plddt_array)]}\")\n        print(f\"Max pLDDT: {np.max(plddt_array):.1f} at residue {resnum_array[np.argmax(plddt_array)]}\")\n        \n        # Per-region analysis\n        n_term = plddt_array[resnum_array <= 46]\n        acd = plddt_array[(resnum_array >= 47) & (resnum_array <= 155)]\n        c_term = plddt_array[resnum_array >= 156]\n        \n        print(f\"\\nRegion-specific pLDDT:\")\n        print(f\"  N-terminal extension (1-46): {np.mean(n_term):.1f} \u00b1 {np.std(n_term):.1f}\")\n        print(f\"  ACD domain (47-155): {np.mean(acd):.1f} \u00b1 {np.std(acd):.1f}\")\n        print(f\"  C-terminal extension (156-163): {np.mean(c_term):.1f} \u00b1 {np.std(c_term):.1f}\")\n        \n        # Confidence categories\n        very_high = np.sum(plddt_array > 90)\n        confident = np.sum((plddt_array > 70) & (plddt_array <= 90))\n        low = np.sum((plddt_array > 50) & (plddt_array <= 70))\n        very_low = np.sum(plddt_array <= 50)\n        \n        print(f\"\\nConfidence distribution:\")\n        print(f\"  Very high (>90): {very_high}/{len(plddt_array)} ({100*very_high/len(plddt_array):.1f}%)\")\n        print(f\"  Confident (70-90): {confident}/{len(plddt_array)} ({100*confident/len(plddt_array):.1f}%)\")\n        print(f\"  Low (50-70): {low}/{len(plddt_array)} ({100*low/len(plddt_array):.1f}%)\")\n        print(f\"  Very low (<50): {very_low}/{len(plddt_array)} ({100*very_low/len(plddt_array):.1f}%)\")\n        \n        # Plot\n        import matplotlib.pyplot as plt\n        fig, ax = plt.subplots(figsize=(10, 4))\n        \n        # Color by confidence\n        colors = []\n        for s in plddt_array:\n            if s > 90: colors.append('#0053D6')\n            elif s > 70: colors.append('#65CBF3')\n            elif s > 50: colors.append('#FFDB13')\n            else: colors.append('#FF7D45')\n        \n        ax.bar(resnum_array, plddt_array, color=colors, width=1.0)\n        ax.axhline(y=70, color='gray', linestyle='--', alpha=0.5)\n        ax.axhline(y=90, color='gray', linestyle='--', alpha=0.5)\n        \n        # Domain boundaries\n        ax.axvline(x=46.5, color='red', linestyle='-', alpha=0.7, label='Domain boundaries')\n        ax.axvline(x=155.5, color='red', linestyle='-', alpha=0.7)\n        \n        ax.set_xlabel('Residue number')\n        ax.set_ylabel('pLDDT score')\n        ax.set_title('AlphaFold pLDDT Confidence - B7FXQ8 HSP20A (P. tricornutum)')\n        ax.set_ylim(0, 100)\n        ax.text(23, 95, 'N-term', ha='center', fontsize=9)\n        ax.text(101, 95, 'ACD domain', ha='center', fontsize=9)\n        ax.text(159, 95, 'C-term', ha='center', fontsize=9)\n        \n        plt.tight_layout()\n        plt.savefig('alphafold_plddt_B7FXQ8.png', dpi=150)\n        plt.show()\n        print(\"\\nPlot saved.\")\nelse:\n    print(f\"AlphaFold prediction not available (status: {resp.status_code})\")\n",
  "plot_number": 1
}